コード例 #1
0
ファイル: test_dataset.py プロジェクト: glottobank/pycldf
def test_add_table(tmpdir, ds):
    ds.add_table('stuff.csv', term_uri('id'), 'col1')
    ds.write(fname=str(tmpdir / 't.json'), **{'stuff.csv': [{'ID': '.ab'}]})
    with pytest.raises(ValueError):
        ds.validate()
    ds['stuff.csv', 'ID'].name = 'nid'
    with pytest.raises(ValueError):
        ds.add_columns('stuff.csv', term_uri('id'))
    with pytest.raises(ValueError):
        ds.add_columns('stuff.csv', 'col1')
コード例 #2
0
def test_add_table(tmpdir, ds):
    ds.add_table('stuff.csv', term_uri('id'), 'col1')
    ds.write(fname=str(tmpdir / 't.json'), **{'stuff.csv': [{'ID': '.ab'}]})
    with pytest.raises(ValueError):
        ds.validate()
    ds['stuff.csv', 'ID'].name = 'nid'
    with pytest.raises(ValueError):
        ds.add_columns('stuff.csv', term_uri('id'))
    with pytest.raises(ValueError):
        ds.add_columns('stuff.csv', 'col1')
コード例 #3
0
 def _auto_foreign_keys(self, table, component=None, table_type=None):
     assert (component is None) == (table_type is None)
     for col in table.tableSchema.columns:
         if col.propertyUrl and col.propertyUrl.uri in TERMS.by_uri:
             ref_name = TERMS.by_uri[col.propertyUrl.uri].references
             if (component is None and not ref_name) or \
                     (component is not None and ref_name != table_type):
                 continue
             if any(fkey.columnReference == [col.name]
                    for fkey in table.tableSchema.foreignKeys):
                 continue
             if component is None:
                 # Let's see whether we have the component this column references:
                 try:
                     ref = self[ref_name]
                 except KeyError:
                     continue
             else:
                 ref = component
             idcol = ref.get_column(term_uri('id'))
             table.tableSchema.foreignKeys.append(ForeignKey.fromdict(dict(
                 columnReference=col.name,
                 reference=dict(
                     resource=ref.url.string,
                     columnReference=idcol.name if idcol is not None else 'ID'))))
コード例 #4
0
    def auto_constraints(self, component=None):
        """
        Use CLDF reference properties to implicitely create foreign key constraints.

        :param component: A Table object or `None`.
        """
        if not component:
            for table in self.tables:
                self.auto_constraints(table)
            return

        if not component.tableSchema.primaryKey:
            idcol = component.get_column(term_uri('id'))
            if idcol:
                component.tableSchema.primaryKey = [idcol.name]

        self._auto_foreign_keys(component)

        try:
            table_type = self.get_tabletype(component)
        except ValueError:
            # New component is not a known CLDF term, so cannot add components
            # automatically. TODO: We might me able to infer some based on
            # `xxxReference` column properties?
            return

        # auto-add foreign keys targetting the new component:
        for table in self.tables:
            self._auto_foreign_keys(table, component=component, table_type=table_type)
コード例 #5
0
ファイル: dataset.py プロジェクト: glottobank/pycldf
 def _auto_foreign_keys(self, table, component=None, table_type=None):
     assert (component is None) == (table_type is None)
     for col in table.tableSchema.columns:
         if col.propertyUrl and col.propertyUrl.uri in TERMS.by_uri:
             ref_name = TERMS.by_uri[col.propertyUrl.uri].references
             if (component is None and not ref_name) or \
                     (component is not None and ref_name != table_type):
                 continue
             if any(fkey.columnReference == [col.name]
                    for fkey in table.tableSchema.foreignKeys):
                 continue
             if component is None:
                 # Let's see whether we have the component this column references:
                 try:
                     ref = self[ref_name]
                 except KeyError:
                     continue
             else:
                 ref = component
             idcol = ref.get_column(term_uri('id'))
             table.tableSchema.foreignKeys.append(ForeignKey.fromdict(dict(
                 columnReference=col.name,
                 reference=dict(
                     resource=ref.url.string,
                     columnReference=idcol.name if idcol is not None else 'ID'))))
コード例 #6
0
ファイル: dataset.py プロジェクト: glottobank/pycldf
    def auto_constraints(self, component=None):
        """
        Use CLDF reference properties to implicitely create foreign key constraints.

        :param component: A Table object or `None`.
        """
        if not component:
            for table in self.tables:
                self.auto_constraints(table)
            return

        if not component.tableSchema.primaryKey:
            idcol = component.get_column(term_uri('id'))
            if idcol:
                component.tableSchema.primaryKey = [idcol.name]

        self._auto_foreign_keys(component)

        try:
            table_type = self.get_tabletype(component)
        except ValueError:
            # New component is not a known CLDF term, so cannot add components
            # automatically. TODO: We might me able to infer some based on
            # `xxxReference` column properties?
            return

        # auto-add foreign keys targetting the new component:
        for table in self.tables:
            self._auto_foreign_keys(table, component=component, table_type=table_type)
コード例 #7
0
ファイル: dataset.py プロジェクト: SimonGreenhill/pycldf
    def __getitem__(self, item):
        """
        Access to tables and columns.

        If a pair (table-spec, column-spec) is passed as `item`, a Column will be
        returned, otherwise `item` is assumed to be a table-spec.

        A table-spec may be
        - a CLDF ontology URI matching the dc:conformsTo property of a table
        - the local name of a CLDF ontology URI, where the complete URI matches the
          the dc:conformsTo property of a table
        - a filename matching the `url` property of a table

        A column-spec may be
        - a CLDF ontology URI matching the propertyUrl of a column
        - the local name of a CLDF ontology URI, where the complete URI matches the
          propertyUrl of a column
        - the name of a column
        """
        if isinstance(item, tuple):
            table, column = item
        else:
            table, column = item, None

        if not isinstance(table, Table):
            uri = term_uri(table, terms=TERMS.by_uri)
            for t in self.tables:
                if (uri and t.common_props.get('dc:conformsTo') == uri) \
                        or t.url.string == table:
                    break
            else:
                raise KeyError(table)
        else:
            t = table

        if not column:
            return t

        uri = term_uri(column, terms=TERMS.by_uri)
        for c in t.tableSchema.columns:
            if (c.propertyUrl
                    and c.propertyUrl.uri == uri) or c.header == column:
                return c

        raise KeyError(column)
コード例 #8
0
ファイル: dataset.py プロジェクト: glottobank/pycldf
    def __getitem__(self, item):
        """
        Access to tables and columns.

        If a pair (table-spec, column-spec) is passed as `item`, a Column will be
        returned, otherwise `item` is assumed to be a table-spec.

        A table-spec may be
        - a CLDF ontology URI matching the dc:conformsTo property of a table
        - the local name of a CLDF ontology URI, where the complete URI matches the
          the dc:conformsTo property of a table
        - a filename matching the `url` property of a table

        A column-spec may be
        - a CLDF ontology URI matching the propertyUrl of a column
        - the local name of a CLDF ontology URI, where the complete URI matches the
          propertyUrl of a column
        - the name of a column
        """
        if isinstance(item, tuple):
            table, column = item
        else:
            table, column = item, None

        if not isinstance(table, Table):
            uri = term_uri(table, terms=TERMS.by_uri)
            for t in self.tables:
                if (uri and t.common_props.get('dc:conformsTo') == uri) \
                        or t.url.string == table:
                    break
            else:
                raise KeyError(table)
        else:
            t = table

        if not column:
            return t

        uri = term_uri(column, terms=TERMS.by_uri)
        for c in t.tableSchema.columns:
            if (c.propertyUrl and c.propertyUrl.uri == uri) or c.header == column:
                return c

        raise KeyError(column)
コード例 #9
0
ファイル: test_dataset.py プロジェクト: Anaphory/pycldf
def test_add_foreign_key(ds):
    ds.add_table('primary.csv', term_uri('id'), 'col1')
    ds.add_table('foreign.csv', 'fk_id')
    ds.write(**{'primary.csv': [{'ID': 'ab'}], 'foreign.csv': [{'fk_id': 'xy'}]})
    ds.validate()

    ds.add_foreign_key('foreign.csv', 'fk_id', 'primary.csv')
    ds.write(**{'primary.csv': [{'ID': 'ab'}], 'foreign.csv': [{'fk_id': 'xy'}]})
    with pytest.raises(ValueError):
        ds.validate()

    ds.add_foreign_key('foreign.csv', 'fk_id', 'primary.csv', 'ID')
    ds.write(**{'primary.csv': [{'ID': 'ab'}], 'foreign.csv': [{'fk_id': 'xy'}]})
    with pytest.raises(ValueError):
        ds.validate()
コード例 #10
0
ファイル: dataset.py プロジェクト: glottobank/pycldf
 def match(self, thing):
     if isinstance(thing, TableGroup):
         return thing.common_props.get('dc:conformsTo') == term_uri(self.id)
     if hasattr(thing, 'name'):
         return thing.name == self.fname
     return False
コード例 #11
0
ファイル: db.py プロジェクト: Anaphory/pycldf
def schema(ds):
    """
    Convert the table and column descriptions of a `Dataset` into specifications for the
    DB schema.

    :param ds:
    :return: A pair (tables, reference_tables).
    """
    tables, ref_tables = {}, {}
    table_lookup = {t.url.string: t for t in ds.tables}
    for table in table_lookup.values():
        try:
            sql_table_name = ds.get_tabletype(table)
        except ValueError:
            # SQLite is very permissive with table names.
            sql_table_name = table.url.string
        spec = TableSpec(sql_table_name)
        spec.primary_key = [
            c for c in table.tableSchema.columns
            if c.propertyUrl and c.propertyUrl.uri == term_uri('id')
        ][0].name
        for c in table.tableSchema.columns:
            if c.propertyUrl and c.propertyUrl.uri == term_uri('source'):
                # A column referencing sources is replaced by an association table.
                otype = sql_table_name.replace('Table', '')
                ocolumn = ColSpec(otype + '_ID')
                scolumn = ColSpec('Source_ID')
                ref_tables[sql_table_name] = TableSpec(
                    '{0}Source'.format(otype),
                    [ocolumn, scolumn, ColSpec('Context')],
                    [
                        ([ocolumn], sql_table_name, [ColSpec("id")]),
                        # TODO: This is a cheat: We suppose, as per the CLDF
                        # standard, that there *is* a primary key for a table
                        # like this, and that it is given the `#id` property it
                        # should have.
                        ([scolumn], 'SourceTable', [ColSpec("ID")]),
                        # TODO: This should actually either use the proper
                        # specs of SourceTable and its ID column, or be
                        # separated out.
                    ],
                    c.name)
            else:
                spec.columns.append(
                    ColSpec(c.header,
                            c.datatype.base if c.datatype else c.datatype,
                            c.separator, c.header == spec.primary_key))
                if c.propertyUrl and term_uri(c.propertyUrl.uri, TERMS.by_uri):
                    spec.columns[-1]._sql_name = term_uri(
                        c.propertyUrl.uri, TERMS.by_uri).rsplit("#")[1]
        for fk in table.tableSchema.foreignKeys:
            if fk.reference.schemaReference:  # pragma: no cover
                # We only support Foreign Key references between tables!
                continue
            try:
                ref = ds.get_tabletype(
                    table_lookup[fk.reference.resource.string])
            except ValueError:
                ref = fk.reference.resource.string
            columns = [[s for s in spec.columns if s.name == c][0]
                       for c in fk.columnReference]
            foreign_columns = fk.reference.columnReference
            spec.foreign_keys.append((columns, ref, foreign_columns))
        tables[spec.name] = spec

    # must determine the order in which tables must be created!
    ordered = OrderedDict()
    i = 0
    #
    # We loop through the tables repeatedly, and whenever we find one, which has all
    # referenced tables already in ordered, we move it from tables to ordered.
    #
    while tables and i < 100:
        i += 1
        for table in list(tables.keys()):
            if all(ref[1] in ordered for ref in tables[table].foreign_keys):
                # All referenced tables are already created.
                ordered[table] = tables.pop(table)
                break
    if tables:  # pragma: no cover
        raise ValueError(
            'there seem to be cyclic dependencies between the tables')

    for spec in ordered.values():
        for i in range(len(spec.foreign_keys)):
            columns, ref, foreign_columns = spec.foreign_keys[i]
            try:
                foreign_columns = [[
                    s for s in ordered[ref].columns if s.name == c
                ][0] for c in foreign_columns]
            except IndexError:
                raise ValueError(
                    "Foreign key of table {:} points to columns {:}({:}), which does not exist."
                    .format(spec.name, ref, ", ".join(foreign_columns)))
            spec.foreign_keys[i] = columns, ref, foreign_columns

    return list(ordered.values()), ref_tables
コード例 #12
0
def test_make_column():
    assert make_column(term_uri('source')).separator == ';'
    assert make_column(Column('name')).datatype is None
    with pytest.raises(TypeError):
        make_column(5)
コード例 #13
0
def ds_wl(tmpdir):
    return Wordlist.in_dir(str(tmpdir))


@pytest.fixture
def ds_wl_notables(tmpdir):
    return Wordlist.in_dir(str(tmpdir), empty_tables=True)


@pytest.mark.parametrize("col_spec,datatype", [
    ('name', 'string'),
    ({
        'name': 'num',
        'datatype': 'decimal'
    }, 'decimal'),
    (term_uri('latitude'), 'decimal'),
])
def test_column_basetype(col_spec, datatype):
    assert make_column(col_spec).datatype.base == datatype


def test_make_column():
    assert make_column(term_uri('source')).separator == ';'
    assert make_column(Column('name')).datatype is None
    with pytest.raises(TypeError):
        make_column(5)


def test_column_names(ds_wl):
    cn = ds_wl.column_names
    assert cn.values is None
コード例 #14
0
ファイル: test_dataset.py プロジェクト: glottobank/pycldf
def test_make_column():
    assert make_column(term_uri('source')).separator == ';'
    assert make_column(Column('name')).datatype is None
    with pytest.raises(TypeError):
        make_column(5)
コード例 #15
0
 def match(self, thing):
     if isinstance(thing, TableGroup):
         return thing.common_props.get('dc:conformsTo') == term_uri(self.id)
     if hasattr(thing, 'name'):
         return thing.name == self.fname
     return False
コード例 #16
0
def schema(ds):
    """
    Convert the table and column descriptions of a `Dataset` into specifications for the
    DB schema.

    :param ds:
    :return: A pair (tables, reference_tables).
    """
    tables, ref_tables = {}, {}
    table_lookup = {t.url.string: t for t in ds.tables if ds.get_tabletype(t)}
    for table in table_lookup.values():
        spec = TableSpec(ds.get_tabletype(table))
        spec.primary_key = [
            c for c in table.tableSchema.columns if
            c.propertyUrl and c.propertyUrl.uri == term_uri('id')][0].name
        # Map the column name to the default:
        if spec.name in PROPERTY_URL_TO_COL:
            spec.primary_key = PROPERTY_URL_TO_COL[spec.name][term_uri('id')]
        for c in table.tableSchema.columns:
            if c.propertyUrl and c.propertyUrl.uri == term_uri('source'):
                # A column referencing sources is replaced by an association table.
                otype = ds.get_tabletype(table).replace('Table', '')
                ref_tables[ds.get_tabletype(table)] = TableSpec(
                    '{0}Source'.format(otype),  # The name of the association table.
                    [ColSpec(otype + '_ID'), ColSpec('Source_ID'), ColSpec('Context')],
                    [
                        (  # The foreign key to the referencing object:
                            ['dataset_ID', otype + '_ID'],
                            ds.get_tabletype(table),
                            ['dataset_ID', spec.primary_key]),
                        (  # The foreign key to the referenced source:
                            ['dataset_ID', 'Source_ID'],
                            'SourceTable',
                            ['dataset_ID', 'ID']),
                    ],
                    c.name)
            else:
                cname = c.header
                if c.propertyUrl and spec.name in PROPERTY_URL_TO_COL:
                    if c.propertyUrl.uri in PROPERTY_URL_TO_COL[spec.name]:
                        cname = PROPERTY_URL_TO_COL[spec.name][c.propertyUrl.uri]
                spec.columns.append(ColSpec(
                    cname,
                    c.datatype.base if c.datatype else c.datatype,
                    c.separator,
                    cname == spec.primary_key,
                    cldf_name=c.header))
        listvalued = set(c.name for c in table.tableSchema.columns if c.separator)
        for fk in table.tableSchema.foreignKeys:
            if fk.reference.schemaReference:
                # We only support Foreign Key references between tables!
                continue  # pragma: no cover
            ref = table_lookup[fk.reference.resource.string]
            ref_type = ds.get_tabletype(ref)
            if ref_type:
                colRefs = sorted(fk.columnReference)
                if any(c in listvalued for c in colRefs):
                    # We drop list-valued foreign keys
                    continue  # pragma: no cover
                if spec.name in PROPERTY_URL_TO_COL:  # Current table is a standard CLDF component.
                    # Must map foreign keys
                    colRefs = []
                    for c in sorted(fk.columnReference):
                        c = ds[spec.name, c]
                        if c.propertyUrl and c.propertyUrl.uri in PROPERTY_URL_TO_COL[spec.name]:
                            colRefs.append(PROPERTY_URL_TO_COL[spec.name][c.propertyUrl.uri])
                        else:
                            colRefs.append(c.header)  # pragma: no cover
                rcolRefs = sorted(fk.reference.columnReference)
                if ref_type in PROPERTY_URL_TO_COL:
                    # Must map foreign key targets!
                    rcolRefs = []
                    for c in sorted(fk.reference.columnReference):
                        c = ds[ref_type, c]
                        if c.propertyUrl and c.propertyUrl.uri in PROPERTY_URL_TO_COL[ref_type]:
                            rcolRefs.append(PROPERTY_URL_TO_COL[ref_type][c.propertyUrl.uri])
                        else:
                            rcolRefs.append(c.header)  # pragma: no cover
                spec.foreign_keys.append((
                    tuple(['dataset_ID'] + colRefs),
                    ds.get_tabletype(table_lookup[fk.reference.resource.string]),
                    tuple(['dataset_ID'] + rcolRefs)))
        tables[spec.name] = spec

    # must determine the order in which tables must be created!
    ordered = collections.OrderedDict()
    i = 0
    #
    # We loop through the tables repeatedly, and whenever we find one, which has all
    # referenced tables already in ordered, we move it from tables to ordered.
    #
    while tables and i < 100:
        i += 1
        for table in list(tables.keys()):
            if all(ref[1] in ordered for ref in tables[table].foreign_keys):
                # All referenced tables are already created.
                ordered[table] = tables.pop(table)
                break
    if tables:  # pragma: no cover
        raise ValueError('there seem to be cyclic dependencies between the tables')

    return list(ordered.values()), ref_tables
コード例 #17
0
ファイル: db.py プロジェクト: afcarl/pycldf
def schema(ds):
    """
    Convert the table and column descriptions of a `Dataset` into specifications for the
    DB schema.

    :param ds:
    :return: A pair (tables, reference_tables).
    """
    tables, ref_tables = {}, {}
    table_lookup = {t.url.string: t for t in ds.tables}
    for table in table_lookup.values():
        try:
            sql_table_name = ds.get_tabletype(table)
        except ValueError:
            # SQLite is very permissive with table names.
            sql_table_name = table.url.string
        spec = TableSpec(sql_table_name)
        spec.primary_key = [
            c for c in table.tableSchema.columns
            if c.propertyUrl and c.propertyUrl.uri == term_uri('id')
        ][0].name
        for c in table.tableSchema.columns:
            if c.propertyUrl and c.propertyUrl.uri == term_uri('source'):
                # A column referencing sources is replaced by an association table.
                otype = sql_table_name.replace('Table', '')
                ref_tables[sql_table_name] = TableSpec(
                    '{0}Source'.format(otype), [
                        ColSpec(otype + '_ID'),
                        ColSpec('Source_ID'),
                        ColSpec('Context')
                    ], [
                        ([otype + '_ID'], sql_table_name, [spec.primary_key]),
                        (['Source_ID'], 'SourceTable', ['ID']),
                    ], c.name)
            else:
                spec.columns.append(
                    ColSpec(c.header,
                            c.datatype.base if c.datatype else c.datatype,
                            c.separator, c.header == spec.primary_key))
        for fk in table.tableSchema.foreignKeys:
            if fk.reference.schemaReference:  # pragma: no cover
                # We only support Foreign Key references between tables!
                continue
            try:
                ref = ds.get_tabletype(
                    table_lookup[fk.reference.resource.string])
            except ValueError:
                ref = fk.reference.resource.string
            spec.foreign_keys.append(
                (tuple(sorted(fk.columnReference)), ref,
                 tuple(sorted(fk.reference.columnReference))))
        tables[spec.name] = spec

    # must determine the order in which tables must be created!
    ordered = OrderedDict()
    i = 0
    #
    # We loop through the tables repeatedly, and whenever we find one, which has all
    # referenced tables already in ordered, we move it from tables to ordered.
    #
    while tables and i < 100:
        i += 1
        for table in list(tables.keys()):
            if all(ref[1] in ordered for ref in tables[table].foreign_keys):
                # All referenced tables are already created.
                ordered[table] = tables.pop(table)
                break
    if tables:  # pragma: no cover
        raise ValueError(
            'there seem to be cyclic dependencies between the tables')

    return list(ordered.values()), ref_tables
コード例 #18
0
ファイル: test_dataset.py プロジェクト: glottobank/pycldf

@pytest.fixture
def ds_wl(tmpdir):
    return Wordlist.in_dir(str(tmpdir))


@pytest.fixture
def ds_wl_notables(tmpdir):
    return Wordlist.in_dir(str(tmpdir), empty_tables=True)


@pytest.mark.parametrize("col_spec,datatype", [
    ('name', 'string'),
    ({'name': 'num', 'datatype': 'decimal'}, 'decimal'),
    (term_uri('latitude'), 'decimal'),
])
def test_column_basetype(col_spec, datatype):
    assert make_column(col_spec).datatype.base == datatype


def test_make_column():
    assert make_column(term_uri('source')).separator == ';'
    assert make_column(Column('name')).datatype is None
    with pytest.raises(TypeError):
        make_column(5)


def test_primary_table(ds):
    assert ds.primary_table is None