def test_add_table(tmpdir, ds): ds.add_table('stuff.csv', term_uri('id'), 'col1') ds.write(fname=str(tmpdir / 't.json'), **{'stuff.csv': [{'ID': '.ab'}]}) with pytest.raises(ValueError): ds.validate() ds['stuff.csv', 'ID'].name = 'nid' with pytest.raises(ValueError): ds.add_columns('stuff.csv', term_uri('id')) with pytest.raises(ValueError): ds.add_columns('stuff.csv', 'col1')
def _auto_foreign_keys(self, table, component=None, table_type=None): assert (component is None) == (table_type is None) for col in table.tableSchema.columns: if col.propertyUrl and col.propertyUrl.uri in TERMS.by_uri: ref_name = TERMS.by_uri[col.propertyUrl.uri].references if (component is None and not ref_name) or \ (component is not None and ref_name != table_type): continue if any(fkey.columnReference == [col.name] for fkey in table.tableSchema.foreignKeys): continue if component is None: # Let's see whether we have the component this column references: try: ref = self[ref_name] except KeyError: continue else: ref = component idcol = ref.get_column(term_uri('id')) table.tableSchema.foreignKeys.append(ForeignKey.fromdict(dict( columnReference=col.name, reference=dict( resource=ref.url.string, columnReference=idcol.name if idcol is not None else 'ID'))))
def auto_constraints(self, component=None): """ Use CLDF reference properties to implicitely create foreign key constraints. :param component: A Table object or `None`. """ if not component: for table in self.tables: self.auto_constraints(table) return if not component.tableSchema.primaryKey: idcol = component.get_column(term_uri('id')) if idcol: component.tableSchema.primaryKey = [idcol.name] self._auto_foreign_keys(component) try: table_type = self.get_tabletype(component) except ValueError: # New component is not a known CLDF term, so cannot add components # automatically. TODO: We might me able to infer some based on # `xxxReference` column properties? return # auto-add foreign keys targetting the new component: for table in self.tables: self._auto_foreign_keys(table, component=component, table_type=table_type)
def __getitem__(self, item): """ Access to tables and columns. If a pair (table-spec, column-spec) is passed as `item`, a Column will be returned, otherwise `item` is assumed to be a table-spec. A table-spec may be - a CLDF ontology URI matching the dc:conformsTo property of a table - the local name of a CLDF ontology URI, where the complete URI matches the the dc:conformsTo property of a table - a filename matching the `url` property of a table A column-spec may be - a CLDF ontology URI matching the propertyUrl of a column - the local name of a CLDF ontology URI, where the complete URI matches the propertyUrl of a column - the name of a column """ if isinstance(item, tuple): table, column = item else: table, column = item, None if not isinstance(table, Table): uri = term_uri(table, terms=TERMS.by_uri) for t in self.tables: if (uri and t.common_props.get('dc:conformsTo') == uri) \ or t.url.string == table: break else: raise KeyError(table) else: t = table if not column: return t uri = term_uri(column, terms=TERMS.by_uri) for c in t.tableSchema.columns: if (c.propertyUrl and c.propertyUrl.uri == uri) or c.header == column: return c raise KeyError(column)
def test_add_foreign_key(ds): ds.add_table('primary.csv', term_uri('id'), 'col1') ds.add_table('foreign.csv', 'fk_id') ds.write(**{'primary.csv': [{'ID': 'ab'}], 'foreign.csv': [{'fk_id': 'xy'}]}) ds.validate() ds.add_foreign_key('foreign.csv', 'fk_id', 'primary.csv') ds.write(**{'primary.csv': [{'ID': 'ab'}], 'foreign.csv': [{'fk_id': 'xy'}]}) with pytest.raises(ValueError): ds.validate() ds.add_foreign_key('foreign.csv', 'fk_id', 'primary.csv', 'ID') ds.write(**{'primary.csv': [{'ID': 'ab'}], 'foreign.csv': [{'fk_id': 'xy'}]}) with pytest.raises(ValueError): ds.validate()
def match(self, thing): if isinstance(thing, TableGroup): return thing.common_props.get('dc:conformsTo') == term_uri(self.id) if hasattr(thing, 'name'): return thing.name == self.fname return False
def schema(ds): """ Convert the table and column descriptions of a `Dataset` into specifications for the DB schema. :param ds: :return: A pair (tables, reference_tables). """ tables, ref_tables = {}, {} table_lookup = {t.url.string: t for t in ds.tables} for table in table_lookup.values(): try: sql_table_name = ds.get_tabletype(table) except ValueError: # SQLite is very permissive with table names. sql_table_name = table.url.string spec = TableSpec(sql_table_name) spec.primary_key = [ c for c in table.tableSchema.columns if c.propertyUrl and c.propertyUrl.uri == term_uri('id') ][0].name for c in table.tableSchema.columns: if c.propertyUrl and c.propertyUrl.uri == term_uri('source'): # A column referencing sources is replaced by an association table. otype = sql_table_name.replace('Table', '') ocolumn = ColSpec(otype + '_ID') scolumn = ColSpec('Source_ID') ref_tables[sql_table_name] = TableSpec( '{0}Source'.format(otype), [ocolumn, scolumn, ColSpec('Context')], [ ([ocolumn], sql_table_name, [ColSpec("id")]), # TODO: This is a cheat: We suppose, as per the CLDF # standard, that there *is* a primary key for a table # like this, and that it is given the `#id` property it # should have. ([scolumn], 'SourceTable', [ColSpec("ID")]), # TODO: This should actually either use the proper # specs of SourceTable and its ID column, or be # separated out. ], c.name) else: spec.columns.append( ColSpec(c.header, c.datatype.base if c.datatype else c.datatype, c.separator, c.header == spec.primary_key)) if c.propertyUrl and term_uri(c.propertyUrl.uri, TERMS.by_uri): spec.columns[-1]._sql_name = term_uri( c.propertyUrl.uri, TERMS.by_uri).rsplit("#")[1] for fk in table.tableSchema.foreignKeys: if fk.reference.schemaReference: # pragma: no cover # We only support Foreign Key references between tables! continue try: ref = ds.get_tabletype( table_lookup[fk.reference.resource.string]) except ValueError: ref = fk.reference.resource.string columns = [[s for s in spec.columns if s.name == c][0] for c in fk.columnReference] foreign_columns = fk.reference.columnReference spec.foreign_keys.append((columns, ref, foreign_columns)) tables[spec.name] = spec # must determine the order in which tables must be created! ordered = OrderedDict() i = 0 # # We loop through the tables repeatedly, and whenever we find one, which has all # referenced tables already in ordered, we move it from tables to ordered. # while tables and i < 100: i += 1 for table in list(tables.keys()): if all(ref[1] in ordered for ref in tables[table].foreign_keys): # All referenced tables are already created. ordered[table] = tables.pop(table) break if tables: # pragma: no cover raise ValueError( 'there seem to be cyclic dependencies between the tables') for spec in ordered.values(): for i in range(len(spec.foreign_keys)): columns, ref, foreign_columns = spec.foreign_keys[i] try: foreign_columns = [[ s for s in ordered[ref].columns if s.name == c ][0] for c in foreign_columns] except IndexError: raise ValueError( "Foreign key of table {:} points to columns {:}({:}), which does not exist." .format(spec.name, ref, ", ".join(foreign_columns))) spec.foreign_keys[i] = columns, ref, foreign_columns return list(ordered.values()), ref_tables
def test_make_column(): assert make_column(term_uri('source')).separator == ';' assert make_column(Column('name')).datatype is None with pytest.raises(TypeError): make_column(5)
def ds_wl(tmpdir): return Wordlist.in_dir(str(tmpdir)) @pytest.fixture def ds_wl_notables(tmpdir): return Wordlist.in_dir(str(tmpdir), empty_tables=True) @pytest.mark.parametrize("col_spec,datatype", [ ('name', 'string'), ({ 'name': 'num', 'datatype': 'decimal' }, 'decimal'), (term_uri('latitude'), 'decimal'), ]) def test_column_basetype(col_spec, datatype): assert make_column(col_spec).datatype.base == datatype def test_make_column(): assert make_column(term_uri('source')).separator == ';' assert make_column(Column('name')).datatype is None with pytest.raises(TypeError): make_column(5) def test_column_names(ds_wl): cn = ds_wl.column_names assert cn.values is None
def schema(ds): """ Convert the table and column descriptions of a `Dataset` into specifications for the DB schema. :param ds: :return: A pair (tables, reference_tables). """ tables, ref_tables = {}, {} table_lookup = {t.url.string: t for t in ds.tables if ds.get_tabletype(t)} for table in table_lookup.values(): spec = TableSpec(ds.get_tabletype(table)) spec.primary_key = [ c for c in table.tableSchema.columns if c.propertyUrl and c.propertyUrl.uri == term_uri('id')][0].name # Map the column name to the default: if spec.name in PROPERTY_URL_TO_COL: spec.primary_key = PROPERTY_URL_TO_COL[spec.name][term_uri('id')] for c in table.tableSchema.columns: if c.propertyUrl and c.propertyUrl.uri == term_uri('source'): # A column referencing sources is replaced by an association table. otype = ds.get_tabletype(table).replace('Table', '') ref_tables[ds.get_tabletype(table)] = TableSpec( '{0}Source'.format(otype), # The name of the association table. [ColSpec(otype + '_ID'), ColSpec('Source_ID'), ColSpec('Context')], [ ( # The foreign key to the referencing object: ['dataset_ID', otype + '_ID'], ds.get_tabletype(table), ['dataset_ID', spec.primary_key]), ( # The foreign key to the referenced source: ['dataset_ID', 'Source_ID'], 'SourceTable', ['dataset_ID', 'ID']), ], c.name) else: cname = c.header if c.propertyUrl and spec.name in PROPERTY_URL_TO_COL: if c.propertyUrl.uri in PROPERTY_URL_TO_COL[spec.name]: cname = PROPERTY_URL_TO_COL[spec.name][c.propertyUrl.uri] spec.columns.append(ColSpec( cname, c.datatype.base if c.datatype else c.datatype, c.separator, cname == spec.primary_key, cldf_name=c.header)) listvalued = set(c.name for c in table.tableSchema.columns if c.separator) for fk in table.tableSchema.foreignKeys: if fk.reference.schemaReference: # We only support Foreign Key references between tables! continue # pragma: no cover ref = table_lookup[fk.reference.resource.string] ref_type = ds.get_tabletype(ref) if ref_type: colRefs = sorted(fk.columnReference) if any(c in listvalued for c in colRefs): # We drop list-valued foreign keys continue # pragma: no cover if spec.name in PROPERTY_URL_TO_COL: # Current table is a standard CLDF component. # Must map foreign keys colRefs = [] for c in sorted(fk.columnReference): c = ds[spec.name, c] if c.propertyUrl and c.propertyUrl.uri in PROPERTY_URL_TO_COL[spec.name]: colRefs.append(PROPERTY_URL_TO_COL[spec.name][c.propertyUrl.uri]) else: colRefs.append(c.header) # pragma: no cover rcolRefs = sorted(fk.reference.columnReference) if ref_type in PROPERTY_URL_TO_COL: # Must map foreign key targets! rcolRefs = [] for c in sorted(fk.reference.columnReference): c = ds[ref_type, c] if c.propertyUrl and c.propertyUrl.uri in PROPERTY_URL_TO_COL[ref_type]: rcolRefs.append(PROPERTY_URL_TO_COL[ref_type][c.propertyUrl.uri]) else: rcolRefs.append(c.header) # pragma: no cover spec.foreign_keys.append(( tuple(['dataset_ID'] + colRefs), ds.get_tabletype(table_lookup[fk.reference.resource.string]), tuple(['dataset_ID'] + rcolRefs))) tables[spec.name] = spec # must determine the order in which tables must be created! ordered = collections.OrderedDict() i = 0 # # We loop through the tables repeatedly, and whenever we find one, which has all # referenced tables already in ordered, we move it from tables to ordered. # while tables and i < 100: i += 1 for table in list(tables.keys()): if all(ref[1] in ordered for ref in tables[table].foreign_keys): # All referenced tables are already created. ordered[table] = tables.pop(table) break if tables: # pragma: no cover raise ValueError('there seem to be cyclic dependencies between the tables') return list(ordered.values()), ref_tables
def schema(ds): """ Convert the table and column descriptions of a `Dataset` into specifications for the DB schema. :param ds: :return: A pair (tables, reference_tables). """ tables, ref_tables = {}, {} table_lookup = {t.url.string: t for t in ds.tables} for table in table_lookup.values(): try: sql_table_name = ds.get_tabletype(table) except ValueError: # SQLite is very permissive with table names. sql_table_name = table.url.string spec = TableSpec(sql_table_name) spec.primary_key = [ c for c in table.tableSchema.columns if c.propertyUrl and c.propertyUrl.uri == term_uri('id') ][0].name for c in table.tableSchema.columns: if c.propertyUrl and c.propertyUrl.uri == term_uri('source'): # A column referencing sources is replaced by an association table. otype = sql_table_name.replace('Table', '') ref_tables[sql_table_name] = TableSpec( '{0}Source'.format(otype), [ ColSpec(otype + '_ID'), ColSpec('Source_ID'), ColSpec('Context') ], [ ([otype + '_ID'], sql_table_name, [spec.primary_key]), (['Source_ID'], 'SourceTable', ['ID']), ], c.name) else: spec.columns.append( ColSpec(c.header, c.datatype.base if c.datatype else c.datatype, c.separator, c.header == spec.primary_key)) for fk in table.tableSchema.foreignKeys: if fk.reference.schemaReference: # pragma: no cover # We only support Foreign Key references between tables! continue try: ref = ds.get_tabletype( table_lookup[fk.reference.resource.string]) except ValueError: ref = fk.reference.resource.string spec.foreign_keys.append( (tuple(sorted(fk.columnReference)), ref, tuple(sorted(fk.reference.columnReference)))) tables[spec.name] = spec # must determine the order in which tables must be created! ordered = OrderedDict() i = 0 # # We loop through the tables repeatedly, and whenever we find one, which has all # referenced tables already in ordered, we move it from tables to ordered. # while tables and i < 100: i += 1 for table in list(tables.keys()): if all(ref[1] in ordered for ref in tables[table].foreign_keys): # All referenced tables are already created. ordered[table] = tables.pop(table) break if tables: # pragma: no cover raise ValueError( 'there seem to be cyclic dependencies between the tables') return list(ordered.values()), ref_tables
@pytest.fixture def ds_wl(tmpdir): return Wordlist.in_dir(str(tmpdir)) @pytest.fixture def ds_wl_notables(tmpdir): return Wordlist.in_dir(str(tmpdir), empty_tables=True) @pytest.mark.parametrize("col_spec,datatype", [ ('name', 'string'), ({'name': 'num', 'datatype': 'decimal'}, 'decimal'), (term_uri('latitude'), 'decimal'), ]) def test_column_basetype(col_spec, datatype): assert make_column(col_spec).datatype.base == datatype def test_make_column(): assert make_column(term_uri('source')).separator == ';' assert make_column(Column('name')).datatype is None with pytest.raises(TypeError): make_column(5) def test_primary_table(ds): assert ds.primary_table is None