Beispiel #1
0
    def _parse_row_data(self, atdm_row, subject, table_metadata, property_url,
                        row_node, atdm_table):
        top_level_property_url = property_url
        atdm_columns = atdm_table['columns']
        for index, entry in enumerate(atdm_row['cells'].items()):
            col_name, values = entry
            for col_metadata in atdm_columns:
                if col_metadata['name'] == col_name:
                    break
            if col_metadata.get('suppressOutput', False):
                continue
            property_url = col_metadata.get('propertyUrl',
                                            top_level_property_url)
            if 'aboutUrl' in col_metadata:
                subject = UriTemplateUtils.insert_value_rdf(
                    col_metadata['aboutUrl'], atdm_row, col_name,
                    table_metadata['url'])
                if self.mode == CONST_STANDARD_MODE:
                    self.graph.add((row_node, CSVW.describes, subject))

            property_namespace = PropertyUrlUtils.create_namespace(
                property_url, table_metadata['url'])
            predicate = self._predicate_node(property_namespace, property_url,
                                             col_name)
            self._parse_cell_values(values, col_metadata, subject, predicate)
Beispiel #2
0
 def parse_virtual_columns(self, row_node, atdm_row, table_metadata):
     for virtual_column in table_metadata['tableSchema']['columns']:
         if 'virtual' not in virtual_column or virtual_column[
                 'virtual'] is False:
             continue
         subject = URIRef(
             UriTemplateUtils.insert_value(virtual_column['aboutUrl'],
                                           atdm_row, '',
                                           table_metadata['url']))
         predicate = Namespaces.get_term(virtual_column['propertyUrl'])
         obj = UriTemplateUtils.insert_value(virtual_column['valueUrl'],
                                             atdm_row, '',
                                             table_metadata['url'])
         obj = CommonProperties.expand_property_if_possible(obj)
         self.graph.add((subject, predicate, URIRef(obj)))
         if self.mode == CONST_STANDARD_MODE:
             self.graph.add((row_node, CSVW.describes, subject))
Beispiel #3
0
 def _property(col_name, atdm_row, atdm_table, column_metadata):
     if 'propertyUrl' in column_metadata:
         resolved = UriTemplateUtils.insert_value(
             column_metadata['propertyUrl'], atdm_row, col_name,
             atdm_table['url'])
         return Namespaces.replace_url_with_prefix(resolved)
     else:
         return column_metadata['name']
Beispiel #4
0
 def _supply_subjects(self, atdm_table, atdm_row, subjects):
     for cell, column_metadata in zip(atdm_row['cells'].items(),
                                      atdm_table['columns']):
         col_name, values = cell
         if 'aboutUrl' not in column_metadata:
             continue
         resolved_subject = UriTemplateUtils.insert_value(
             column_metadata['aboutUrl'], atdm_row, col_name,
             atdm_table['url'])
         subjects[resolved_subject] = {'@id': resolved_subject}
     self._handle_virtual_columns(atdm_table, atdm_row, '', subjects)
Beispiel #5
0
 def _value(col_name, values, atdm_row, atdm_table, column_metadata):
     if 'valueUrl' in column_metadata:
         value_url = CommonProperties.expand_property_if_possible(
             column_metadata['valueUrl'])
         return UriTemplateUtils.insert_value(value_url, atdm_row, col_name,
                                              atdm_table['url'])
     else:
         if len(values) == 1:
             return values[0]
         else:
             return list(map(lambda value: value, values))
Beispiel #6
0
 def _handle_virtual_columns(atdm_table, atdm_row, col_name, subjects):
     for column_metadata in reversed(atdm_table['columns']):
         if 'virtual' not in column_metadata or column_metadata[
                 'virtual'] is False:
             break
         subject_id = UriTemplateUtils.insert_value(
             column_metadata['aboutUrl'], atdm_row, col_name,
             atdm_table['url'])
         subject = subjects[subject_id]
         if column_metadata['propertyUrl'] == 'rdf:type':
             subject['@type'] = column_metadata['valueUrl']
         else:
             prop = UriTemplateUtils.insert_value(
                 column_metadata['propertyUrl'], atdm_row, col_name,
                 atdm_table['url'])
             subject_id = UriTemplateUtils.insert_value(
                 column_metadata['valueUrl'], atdm_row, col_name,
                 atdm_table['url'])
             subject[prop] = {'@id': subject_id}
             subjects[subject_id] = subject[prop]
Beispiel #7
0
    def _normalize_inherited_property(self, property_name):
        for table in self.metadata['tables']:
            property_value = self.metadata.get(property_name, None)
            property_value = table.get(property_name, property_value)
            property_value = table['tableSchema'].get(property_name, property_value)

            if property_value is not None:
                for column in table['tableSchema']['columns']:
                    if property_name not in column:
                        column[property_name] = property_value
                    if property_name in self._uri_template_properties:
                        column[property_name] = UriTemplateUtils.expand(column[property_name], table['url'])
Beispiel #8
0
    def _parse_row(self, atdm_table, atdm_row):
        row = {'rownum': atdm_row['number'], 'url': atdm_row['@id']}
        self.copy_notes_and_non_core(atdm_row, row)
        self._add_row_titles(row, atdm_row, atdm_table)
        described_row = {}
        subjects = {'': described_row}
        self._supply_subjects(atdm_table, atdm_row, subjects)

        for cell in atdm_row['cells'].items():
            col_name, values = cell
            for column_metadata in atdm_table['columns']:
                if column_metadata['name'] == col_name:
                    break
            about_url = ''
            if 'aboutUrl' in column_metadata:
                about_url = UriTemplateUtils.insert_value(
                    column_metadata['aboutUrl'], atdm_row, col_name,
                    atdm_table['url'])
            subject = subjects[about_url]
            if column_metadata.get('suppressOutput', False):
                continue
            value = self._value(col_name, values, atdm_row, atdm_table,
                                column_metadata)
            prop = self._property(col_name, atdm_row, atdm_table,
                                  column_metadata)
            if prop in subject:
                if type(subject[prop]) is str:
                    subject[prop] = [subject[prop]]
                if type(value) is str:
                    subject[prop].append(value)
                else:
                    subject[prop].extend(value)
            else:
                subject[prop] = value

        if len(subjects) > 1:
            described_row = {**described_row, **list(subjects.values())[1]}
            row['describes'] = [described_row]
            for subject, obj in list(subjects.items())[2:]:
                subject_included = False
                for key, value in list(described_row.items()):
                    if type(value) is dict and value.get('@id') == subject:
                        subject_included = True
                        break
                if not subject_included:
                    row['describes'].append(obj)
        else:
            row['describes'] = [described_row]
        return row
Beispiel #9
0
    def create_namespace(property_url, domain_url=''):
        property_url = UriTemplateUtils.expand(property_url, domain_url)
        if ':' in property_url and '://' not in property_url:
            prefix, term = property_url.split(':')
            return Namespaces.get(prefix)
        if '{' not in property_url:
            return Namespace(property_url)

        property_key = property_url[property_url.find('{') +
                                    1:property_url.find('}')]
        prefix = ''
        if property_key.startswith('#'):
            property_key = property_key[1:]
            property_url = property_url.replace('#', '')
            prefix = '#'
        if property_key == '_name':
            namespace_url = property_url.replace('{_name}', prefix)
            return Namespace(namespace_url)