コード例 #1
0
ファイル: terms.py プロジェクト: afcarl/pycldf
 def __init__(self):
     r = ElementTree.parse(pkg_path('terms.rdf').as_posix()).getroot()
     terms = [Term.from_element(e) for e in r.findall(qname(RDF, 'Property'))]
     for e in r.findall(qname(RDFS, 'Class')):
         terms.append(Term.from_element(e))
     dict.__init__(self, {t.name: t for t in terms})
     self.by_uri = {t.uri: t for t in terms}
コード例 #2
0
ファイル: dataset.py プロジェクト: SimonGreenhill/pycldf
    def from_metadata(cls, fname):
        fname = pathlib.Path(fname)
        if fname.is_dir():
            name = '{0}{1}'.format(cls.__name__, MD_SUFFIX)
            tablegroup = TableGroup.from_file(pkg_path('modules', name))
            # adapt the path of the metadata file such that paths to tables are resolved
            # correctly:
            tablegroup._fname = fname.joinpath(name)
        else:
            tablegroup = TableGroup.from_file(fname)

        comps = collections.Counter()
        for table in tablegroup.tables:
            try:
                dt = Dataset.get_tabletype(table)
                if dt:
                    comps.update([dt])
            except ValueError:
                pass
        if comps and comps.most_common(1)[0][1] > 1:
            raise ValueError('{0}: duplicate components!'.format(fname))

        for mod in get_modules():
            if mod.match(tablegroup):
                return mod.cls(tablegroup)
        return cls(tablegroup)
コード例 #3
0
ファイル: dataset.py プロジェクト: SimonGreenhill/pycldf
    def add_component(self, component, *cols, **kw):
        if isinstance(component, str):
            component = jsonlib.load(
                pkg_path('components', '{0}{1}'.format(component, MD_SUFFIX)))
        if isinstance(component, dict):
            component = Table.fromvalue(component)
        assert isinstance(component, Table)

        if kw.get('url'):
            component.url = Link(kw['url'])

        for other_table in self.tables:
            if other_table.url == component.url:
                raise ValueError('tables must have distinct url properties')

        self.add_columns(component, *cols)
        try:
            table_type = self.get_tabletype(component)
        except ValueError:
            table_type = None
        if table_type:
            for other_table in self.tables:
                try:
                    other_table_type = self.get_tabletype(other_table)
                except ValueError:  # pragma: no cover
                    continue
                if other_table_type == table_type:
                    raise ValueError('components must not be added twice')

        self.tables.append(component)
        component._parent = self.tablegroup
        self.auto_constraints(component)
        return component
コード例 #4
0
    def from_metadata(cls, fname):
        fname = Path(fname)
        if fname.is_dir():
            name = '{0}{1}'.format(cls.__name__, MD_SUFFIX)
            tablegroup = TableGroup.from_file(pkg_path('modules', name))
            # adapt the path of the metadata file such that paths to tables are resolved
            # correctly:
            tablegroup._fname = fname.joinpath(name)
        else:
            tablegroup = TableGroup.from_file(fname)

        for mod in get_modules():
            if mod.match(tablegroup):
                return mod.cls(tablegroup)
        return cls(tablegroup)
コード例 #5
0
    def __attrs_post_init__(self):
        if self.default_metadata_path:
            self.default_metadata_path = pathlib.Path(
                self.default_metadata_path)
            try:
                Dataset.from_metadata(self.default_metadata_path)
            except Exception:
                raise ValueError('invalid default metadata: {0}'.format(
                    self.default_metadata_path))
        else:
            self.default_metadata_path = pkg_path(
                'modules', '{0}{1}'.format(self.module, MD_SUFFIX))

        if not self.metadata_fname:
            self.metadata_fname = self.default_metadata_path.name
コード例 #6
0
def get_modules():
    global _modules
    if not _modules:
        ds = sys.modules[__name__]
        for p in pkg_path('modules').glob('*{0}'.format(MD_SUFFIX)):
            tg = TableGroup.from_file(p)
            mod = Module(
                tg.common_props['dc:conformsTo'],
                tg.tables[0].url.string if tg.tables else None)
            mod.cls = getattr(ds, mod.id)
            _modules.append(mod)
        # prefer Wordlist over ParallelText (forms.csv)
        _modules = sorted(
            _modules,
            key=lambda m: (m.cls in (Wordlist, ParallelText), m.cls is ParallelText))
    return _modules
コード例 #7
0
ファイル: dataset.py プロジェクト: glottobank/pycldf
def get_modules():
    global _modules
    if not _modules:
        ds = sys.modules[__name__]
        for p in pkg_path('modules').glob('*{0}'.format(MD_SUFFIX)):
            tg = TableGroup.from_file(p)
            mod = Module(
                tg.common_props['dc:conformsTo'],
                tg.tables[0].url.string if tg.tables else None)
            mod.cls = getattr(ds, mod.id)
            _modules.append(mod)
        # prefer Wordlist over ParallelText (forms.csv)
        _modules = sorted(
            _modules,
            key=lambda m: (m.cls in (Wordlist, ParallelText), m.cls is ParallelText))
    return _modules
コード例 #8
0
ファイル: text.py プロジェクト: cldf/cldfviz
    def component(self, cldf=None):
        """
        :param cldf: `pycldf.Dataset` instance to which the link refers.
        :return: Name of the CLDF component the link pertains to or `None`.
        """
        name = self.table_or_fname
        if cldf is None:
            # If no CLDF dataset is passed as context, we can only detect links using proper
            # component names as path:
            return name if name == 'Source' or pkg_path('components', name + MD_SUFFIX).exists() \
                else None

        if name == cldf.bibname or name == 'Source':
            return 'Source'
        try:
            return cldf.get_tabletype(cldf[name])
        except (KeyError, ValueError):
            return None
コード例 #9
0
    def add_component(self, component, *cols):
        if isinstance(component, string_types):
            component = jsonlib.load(
                pkg_path('components', '{0}{1}'.format(component, MD_SUFFIX)))
        if isinstance(component, dict):
            component = Table.fromvalue(component)
        assert isinstance(component, Table)
        self.add_columns(component, *cols)
        try:
            table_type = self.get_tabletype(component)
        except ValueError:
            table_type = None
        for other_table in self.tables:
            try:
                other_table_type = self.get_tabletype(other_table)
            except ValueError:
                continue
            if other_table_type == table_type:
                raise ValueError('components must not be added twice')

        self.tables.append(component)
        component._parent = self.tablegroup
        self.auto_constraints(component)
コード例 #10
0
ファイル: dataset.py プロジェクト: glottobank/pycldf
    def add_component(self, component, *cols):
        if isinstance(component, string_types):
            component = jsonlib.load(
                pkg_path('components', '{0}{1}'.format(component, MD_SUFFIX)))
        if isinstance(component, dict):
            component = Table.fromvalue(component)
        assert isinstance(component, Table)
        self.add_columns(component, *cols)
        try:
            table_type = self.get_tabletype(component)
        except ValueError:
            table_type = None
        for other_table in self.tables:
            try:
                other_table_type = self.get_tabletype(other_table)
            except ValueError:
                continue
            if other_table_type == table_type:
                raise ValueError('components must not be added twice')

        self.tables.append(component)
        component._parent = self.tablegroup
        self.auto_constraints(component)
コード例 #11
0
ファイル: dataset.py プロジェクト: glottobank/pycldf
    def from_metadata(cls, fname):
        fname = Path(fname)
        if fname.is_dir():
            name = '{0}{1}'.format(cls.__name__, MD_SUFFIX)
            tablegroup = TableGroup.from_file(pkg_path('modules', name))
            # adapt the path of the metadata file such that paths to tables are resolved
            # correctly:
            tablegroup._fname = fname.joinpath(name)
        else:
            tablegroup = TableGroup.from_file(fname)

        comps = Counter()
        for table in tablegroup.tables:
            try:
                comps.update([Dataset.get_tabletype(table)])
            except ValueError:
                pass
        if comps and comps.most_common(1)[0][1] > 1:
            raise ValueError('{0}: duplicate components!'.format(fname))

        for mod in get_modules():
            if mod.match(tablegroup):
                return mod.cls(tablegroup)
        return cls(tablegroup)
コード例 #12
0
    def validate(self, log=None, validators=None):
        validators = validators or []
        validators.extend(VALIDATORS)
        success = True
        default_tg = TableGroup.from_file(
            pkg_path('modules', '{0}{1}'.format(self.module, MD_SUFFIX)))
        for default_table in default_tg.tables:
            dtable_uri = default_table.common_props['dc:conformsTo']
            try:
                table = self[dtable_uri]
            except KeyError:
                log_or_raise('{0} requires {1}'.format(self.module, dtable_uri), log=log)
                success = False
                table = None

            if table:
                default_cols = {
                    c.propertyUrl.uri for c in default_table.tableSchema.columns
                    if c.required or c.common_props.get('dc:isRequiredBy')}
                cols = {
                    c.propertyUrl.uri for c in table.tableSchema.columns
                    if c.propertyUrl}
                table_uri = table.common_props['dc:conformsTo']
                for col in default_cols - cols:
                    log_or_raise('{0} requires column {1}'.format(table_uri, col), log=log)
                    success = False

        for table in self.tables:
            type_uri = table.common_props.get('dc:conformsTo')
            if type_uri:
                try:
                    TERMS.is_cldf_uri(type_uri)
                except ValueError:
                    success = False
                    log_or_raise('invalid CLDF URI: {0}'.format(type_uri), log=log)

            # FIXME: check whether table.common_props['dc:conformsTo'] is in validators!
            validators_ = []
            for col in table.tableSchema.columns:
                if col.propertyUrl:
                    col_uri = col.propertyUrl.uri
                    try:
                        TERMS.is_cldf_uri(col_uri)
                    except ValueError:
                        success = False
                        log_or_raise('invalid CLDF URI: {0}'.format(col_uri), log=log)
                for table_, col_, v_ in validators:
                    if (not table_ or table is self.get(table_)) and col is self.get((table, col_)):
                        validators_.append((col, v_))

            fname = Path(table.url.resolve(table._parent.base))
            if fname.exists():
                for fname, lineno, row in table.iterdicts(log=log, with_metadata=True):
                    for col, validate in validators_:
                        try:
                            validate(self, table, col, row)
                        except ValueError as e:
                            log_or_raise(
                                '{0}:{1}:{2} {3}'.format(fname.name, lineno, col.name, e),
                                log=log)
                            success = False
                if not table.check_primary_key(log=log):
                    success = False

        if not self.tablegroup.check_referential_integrity(log=log):
            success = False

        return success
コード例 #13
0
ファイル: dataset.py プロジェクト: glottobank/pycldf
    def validate(self, log=None, validators=None):
        validators = validators or []
        validators.extend(VALIDATORS)
        success = True
        default_tg = TableGroup.from_file(
            pkg_path('modules', '{0}{1}'.format(self.module, MD_SUFFIX)))
        for default_table in default_tg.tables:
            dtable_uri = default_table.common_props['dc:conformsTo']
            try:
                table = self[dtable_uri]
            except KeyError:
                log_or_raise('{0} requires {1}'.format(self.module, dtable_uri), log=log)
                success = False
                table = None

            if table:
                default_cols = {
                    c.propertyUrl.uri for c in default_table.tableSchema.columns
                    if c.required or c.common_props.get('dc:isRequiredBy')}
                cols = {
                    c.propertyUrl.uri for c in table.tableSchema.columns
                    if c.propertyUrl}
                table_uri = table.common_props['dc:conformsTo']
                for col in default_cols - cols:
                    log_or_raise('{0} requires column {1}'.format(table_uri, col), log=log)
                    success = False

        for table in self.tables:
            type_uri = table.common_props.get('dc:conformsTo')
            if type_uri:
                try:
                    TERMS.is_cldf_uri(type_uri)
                except ValueError:
                    success = False
                    log_or_raise('invalid CLDF URI: {0}'.format(type_uri), log=log)

            # FIXME: check whether table.common_props['dc:conformsTo'] is in validators!
            validators_ = []
            for col in table.tableSchema.columns:
                if col.propertyUrl:
                    col_uri = col.propertyUrl.uri
                    try:
                        TERMS.is_cldf_uri(col_uri)
                    except ValueError:
                        success = False
                        log_or_raise('invalid CLDF URI: {0}'.format(col_uri), log=log)
                for table_, col_, v_ in validators:
                    if (not table_ or table is self.get(table_)) and col is self.get((table, col_)):
                        validators_.append((col, v_))

            fname = Path(table.url.resolve(table._parent.base))
            if fname.exists():
                for fname, lineno, row in table.iterdicts(log=log, with_metadata=True):
                    for col, validate in validators_:
                        try:
                            validate(self, table, col, row)
                        except ValueError as e:
                            log_or_raise(
                                '{0}:{1}:{2} {3}'.format(fname.name, lineno, col.name, e),
                                log=log)
                            success = False
                if not table.check_primary_key(log=log):
                    success = False
            else:
                log_or_raise('{0} does not exist'.format(fname), log=log)
                success = False

        if not self.tablegroup.check_referential_integrity(log=log):
            success = False

        return success