Exemple #1
0
    def _ExpandCodeList(self, dim):
        """Load a code list from CSV and return a list of JSON-LD objects."""
        codeList = []
        dimProps = []
        tableMappings = {}
        for dimProp in AsList(GetSchemaProp(dim, 'dimensionProperty')):
            dimProps.append(dimProp)
        for tableMapping in AsList(GetSchemaProp(dim, 'tableMapping')):
            tableMappings[GetUrl(tableMapping['sourceEntity'])] = tableMapping

        with self.getter.Fetch(GetSchemaProp(dim, 'codeList')) as f:
            reader = DictReader(f)
            for row in reader:
                entry = {k: v for k, v in row.items()}
                if GetSchemaProp(dim, 'equivalentType'):
                    entry['@type'] = ['DimensionValue']
                    entry['@type'] += AsList(
                        GetSchemaProp(dim, 'equivalentType'))
                else:
                    entry['@type'] = 'DimensionValue'
                entry['@id'] = GetSchemaId(dim) + '='
                entry['@id'] += row['codeValue']
                entry['dimension'] = GetSchemaId(dim)
                for dimProp in dimProps:
                    propId = GetSchemaProp(dimProp, 'propertyID')
                    value = dimProp.get('value')
                    if propId:
                        if value:
                            entry[dimProp['propertyID']] = value
                            continue
                        columnId = propId
                        dimPropId = GetSchemaId(dimProp)
                        if dimPropId:
                            tableMapping = tableMappings.get(dimPropId)
                            if tableMapping and 'columnIdentifier' in tableMapping:
                                columnId = tableMapping.get('columnIdentifier')
                            else:
                                columnId = propId
                        for field in row:
                            if field == columnId:
                                if columnId != propId:
                                    entry[propId] = entry[columnId]
                                    del entry[columnId]
                            elif field.startswith(columnId + '.'):
                                entry[columnId] = entry.get(
                                    columnId,
                                    {'@type': dimProp['propertyType']})
                                if isinstance(entry[columnId], str):
                                    entry[columnId] = {
                                        '@type': dimProp['propertyType'],
                                        'name': row['columnId']
                                    }
                                entry[columnId][field[len(columnId) +
                                                      1:]] = entry[field]
                                del entry[field]
                codeList.append(entry)
        return codeList
Exemple #2
0
 def _ExpandFootnotes(self, filename, json_val):
   """Load footnotes from CSV and return a list of JSON-LD objects."""
   footnotes = []
   with self.getter.Fetch(filename) as f:
     reader = DictReader(f)
     for row in reader:
       row['@type'] = 'StatisticalAnnotation'
       row['@id'] = GetSchemaId(json_val) + '#footnote='
       row['@id'] += row['codeValue']
       row['dataset'] = GetSchemaId(json_val)
       footnotes.append(row)
   return footnotes
Exemple #3
0
def CheckDimension(warnings, dim, dsid):
    _CheckIdPresent(warnings, 'Dimension', dim)
    _CheckType(warnings, 'Dimension', dim,
               ['TimeDimension', 'CategoricalDimension'])
    _CheckUrlPresent(warnings, 'Dimension', dim, 'dataset',
                     'required for id ' + GetSchemaId(dim), dsid)
    type = GetSchemaType(dim)
    if type == 'TimeDimension':
        _CheckPropertyPresent(warnings, 'Dimension', dim, 'dateFormat',
                              'required for id ' + GetSchemaId(dim))
    elif type == 'CategoricalDimension':
        _CheckPropertyPresent(warnings, 'Dimension', dim, 'codeList',
                              'required for id ' + GetSchemaId(dim))
Exemple #4
0
def CheckStatisticalDataset(warnings, dataset):
    _CheckType(warnings, 'StatisticalDataset', dataset, ['StatisticalDataset'])
    _CheckIdPresent(warnings, 'StatisticalDataset', dataset)
    _CheckPropertyPresent(warnings, 'StatisticalDataset', dataset, 'dimension',
                          'required')
    for dim in AsList(GetSchemaProp(dataset, 'dimension')):
        CheckDimension(warnings, dim, GetSchemaId(dataset))
    _CheckPropertyPresent(warnings, 'StatisticalDataset', dataset, 'measure',
                          'required')
    for measure in AsList(GetSchemaProp(dataset, 'measure')):
        CheckMeasure(warnings, measure, GetSchemaId(dataset))
    _CheckPropertyPresent(warnings, 'StatisticalDataset', dataset, 'slice',
                          'required')
    for slice in AsList(GetSchemaProp(dataset, 'slice')):
        CheckSlice(warnings, slice, GetSchemaId(dataset))
Exemple #5
0
 def _ExpandCodeList(self, dim):
   """Load a code list from CSV and return a list of JSON-LD objects."""
   codeList = []
   with self.getter.Fetch(GetSchemaProp(dim, 'codeList')) as f:
     reader = DictReader(f)
     for row in reader:
       if GetSchemaProp(dim, 'equivalentType'):
         row['@type'] = ['DimensionValue', GetSchemaProp(dim, 'equivalentType')]
       else:
         row['@type'] = 'DimensionValue'
       row['@id'] = GetSchemaId(dim) + '='
       row['@id'] += row['codeValue']
       row['dimension'] = GetSchemaId(dim)
       codeList.append(row)
   return codeList
Exemple #6
0
def CheckMeasure(warnings, measure, dsid):
    _CheckIdPresent(warnings, 'Measure', measure)
    _CheckType(warnings, 'Measure', measure, ['StatisticalMeasure'])
    _CheckUrlPresent(warnings, 'Measure', measure, 'dataset',
                     'required for id ' + GetSchemaId(measure), dsid)
    _CheckAnyPropertyPresent(warnings, 'Measure', measure,
                             ['unitType', 'unitText'], 'recommended')
Exemple #7
0
def CheckSlice(warnings, slice, dsid):
    _CheckIdPresent(warnings, 'Slice', slice)
    slice_id = GetSchemaId(slice)
    _CheckType(warnings, 'Slice', slice, ['DataSlice'])
    _CheckUrlPresent(warnings, 'Slice', slice, 'dataset',
                     'required for id ' + slice_id, dsid)
    _CheckPropertyPresent(warnings, 'Slice', slice, 'dimension', 'required')

    dims = AsList(GetSchemaProp(slice, 'dimension'))
    for dim in dims:
        url = GetUrl(dim)
        if url is None:
            warnings.append(
                f'Slice property "dimension" values must have URLs for {slice_id}'
            )

    _CheckPropertyPresent(warnings, 'Slice', slice, 'measure', 'required')
    measures = AsList(GetSchemaProp(slice, 'measure'))
    for measure in measures:
        url = GetUrl(measure)
        if url is None:
            warnings.append(
                f'Slice property "measure" values must have URLs for {slice_id}'
            )

    _CheckPropertyPresent(warnings, 'Slice', slice, 'data', 'required')
    data = GetSchemaProp(slice, 'data')
    if not isinstance(data, str):
        if isinstance(data, dict):
            CheckSliceData(warnings, data, slice_id)
        elif isinstance(data, list):
            for datum in data:
                CheckSliceData(warnings, datum, slice_id)
Exemple #8
0
  def _ExpandSliceData(self, slice, dim_defs_by_id):
    data = []
    with self.getter.Fetch(GetSchemaProp(slice, 'data')) as f:
      reader = DictReader(f)
      for row in reader:
        val = {}
        val['@type'] = 'Observation'
        val['slice'] = GetSchemaId(slice)
        val['dimensionValues'] = []
        val['measureValues'] = []
        for dim in AsList(GetSchemaProp(slice, 'dimension')):
          dim = GetUrl(dim)
          fragment = urlparse(dim).fragment
          dim_val = {
              '@type': 'DimensionValue',
              'dimension': dim,
          }
          dim_def = dim_defs_by_id.get(dim)
          if dim_def:
            if GetSchemaProp(dim_def, '@type') == 'CategoricalDimension':
              dim_val['codeValue'] = row[fragment]
            elif GetSchemaProp(dim_def, '@type') == 'TimeDimension':
              if GetSchemaProp(dim_def, 'equivalentType'):
                dim_val['value'] = {
                    '@type': GetSchemaProp(dim_def, 'equivalentType'),
                    '@value': row[fragment]
                }
              else:
                val['dimensionValues'][-1]['value'] = row[fragment]
          val['dimensionValues'].append(dim_val)

        for measure in AsList(GetSchemaProp(slice, 'measure')):
          measure = GetUrl(measure)
          fragment = urlparse(measure).fragment
          val['measureValues'].append({
              '@type': 'MeasureValue',
              'measure': measure,
              'value': row[fragment]
          })
          if row.get(fragment + '*'):
            val['measureValues'][-1]['footnote'] = [
                {
                    '@type': 'StatisticalAnnotation',
                    'codeValue': footnote
                }
                for footnote in row[fragment + '*'].split(';')
            ]
        data.append(val)
    return data
Exemple #9
0
def _CheckIdPresent(warnings, name, obj):
    if GetSchemaId(obj) is None:
        warnings.append(f'{name} has no "@id"')
Exemple #10
0
 def test_GetSchemaId(self):
   self.assertEqual(GetSchemaId({'@id': 'val'}), 'val')
   self.assertEqual(GetSchemaId({'id': 'val'}), 'val')
   self.assertEqual(GetSchemaId({'schema:id': 'val'}), 'val')
Exemple #11
0
    def _ExpandSliceData(self, slice, dim_defs_by_id, meas_defs_by_id):
        data = []
        tableMappings = {}
        for tableMapping in AsList(GetSchemaProp(slice, 'tableMapping')):
            tableMappings[GetUrl(tableMapping['sourceEntity'])] = tableMapping

        with self.getter.Fetch(GetSchemaProp(slice, 'data')) as f:
            reader = DictReader(f)
            for row in reader:
                val = {}
                val['@type'] = 'Observation'
                val['slice'] = GetSchemaId(slice)
                val['dimensionValue'] = []
                val['measureValue'] = []
                for dim in AsList(GetSchemaProp(slice, 'dimension')):
                    dim = GetUrl(dim)
                    dim_def = dim_defs_by_id.get(dim)
                    if dim_def is None:
                        raise RuntimeError(
                            "Unable to find definition for dimension " + dim)
                    tableMapping = tableMappings.get(dim)
                    if tableMapping:
                        col_id = tableMapping['columnIdentifier']
                    else:
                        col_id = urlparse(dim).fragment
                    dim_val = {
                        '@type': 'DimensionValue',
                        'dimension': dim,
                    }
                    if dim_def:
                        if GetSchemaProp(dim_def,
                                         '@type') == 'CategoricalDimension':
                            dim_val['codeValue'] = row[col_id]
                        elif GetSchemaProp(dim_def,
                                           '@type') == 'TimeDimension':
                            if GetSchemaProp(dim_def, 'equivalentType'):
                                dim_val['value'] = {
                                    '@type':
                                    GetSchemaProp(dim_def, 'equivalentType'),
                                    '@value':
                                    row[col_id]
                                }
                            else:
                                dim_val['value'] = row[col_id]
                    val['dimensionValue'].append(dim_val)

                for measure in AsList(GetSchemaProp(slice, 'measure')):
                    measure = GetUrl(measure)
                    meas_def = meas_defs_by_id.get(measure)
                    tableMapping = tableMappings.get(measure)
                    if tableMapping:
                        col_id = tableMapping['columnIdentifier']
                    else:
                        col_id = urlparse(measure).fragment
                    val['measureValue'].append({
                        '@type': 'MeasureValue',
                        'measure': measure,
                        'value': row[col_id]
                    })
                    if row.get(col_id + '*'):
                        val['measureValue'][-1]['footnote'] = [{
                            '@type':
                            'StatisticalAnnotation',
                            'codeValue':
                            footnote
                        } for footnote in row[col_id + '*'].split(';')]
                data.append(val)
        return data