def errs(fn): with self.assertRaises(IncludeError): doc = MetatabDoc() tp = TermParser(fn, resolver=WebResolver, doc=doc) _ = list(tp) return tp.errors_as_dict()
def errs(fn): with self.assertRaises(IncludeError): doc = MetatabDoc() tp = TermParser(CsvPathRowGenerator(fn), doc=doc) _ = list(tp) return tp.errors_as_dict()
def preprocess_cell(self, cell, resources, index): import re from metatab.rowgenerators import TextRowGenerator if not self.extra_terms: self.extra_terms = [] if cell['source'].startswith('%%metatab'): tp = TermParser(TextRowGenerator(re.sub(r'\%\%metatab.*\n', '', cell['source'])), resolver=self.doc.resolver, doc=self.doc) self.doc.load_terms(tp) elif cell['cell_type'] == 'markdown': tags = cell['metadata'].get('tags', []) if 'Title' in tags: self.extra_terms.append(('Root', 'Root.Title', cell.source.strip().replace('#', ''))) elif 'Description' in tags: self.extra_terms.append(('Root', 'Root.Description', cell.source.strip())) else: cell, resources = super().preprocess_cell(cell, resources, index) return cell, resources
def test_line_doc_parts(self): doc = MetatabDoc(TextRowGenerator("Declare: metatab-latest")) for fn in ( 'line/line-oriented-doc-root.txt', 'line/line-oriented-doc-contacts.txt', 'line/line-oriented-doc-references-1.txt', 'line/line-oriented-doc-references-2.txt', 'line/line-oriented-doc-bib.txt', ): with open(test_data(fn)) as f: text = f.read() tp = TermParser(TextRowGenerator(text), resolver=doc.resolver, doc=doc) doc.load_terms(tp) self.assertEqual('47bc1089-7584-41f0-b804-602ec42f1249', doc.get_value('Root.Identifier')) self.assertEqual(152, len(doc.terms)) self.assertEqual(5, len(list(doc['References']))) self.assertEqual(5, len(list(doc['References'].find('Root.Resource'))))
def test_line_doc(self): doc = MetatabDoc(TextRowGenerator("Declare: metatab-latest")) with open(test_data('line/line-oriented-doc.txt')) as f: text = f.read() tp = TermParser(TextRowGenerator(text), resolver=doc.resolver, doc=doc) doc.load_terms(tp) self.assertEqual('47bc1089-7584-41f0-b804-602ec42f1249', doc.get_value('Root.Identifier')) self.assertEqual(152, len(doc.terms)) self.assertEqual(5, len(list(doc['References']))) self.assertEqual(5, len(list(doc['References'].find('Root.Reference')))) self.assertEqual(5, len(list(doc['References'].find( 'Root.Resource')))) #References are Resources rt = list(doc['References'].find('Root.Resource'))[0] print(type(rt))
def add_term_lines(self, text): assert 'root.reference' in TermParser.term_classes tp = TermParser(TextRowGenerator(text), resolver=self.mt_doc.resolver, doc=self.mt_doc) self.mt_doc.load_terms(tp)
def load_declarations(self, decls): term_interp = TermParser(generateRows([['Declare', dcl] for dcl in decls], cache=self._cache), doc=self) list(term_interp) dd = term_interp.declare_dict self.decl_terms.update(dd['terms']) self.decl_sections.update(dd['sections']) return self
def test_term_subclasses(self): from metatab.terms import Term, SectionTerm from metatab import WebResolver doc = MetatabDoc() tp = TermParser(test_data('example1.csv'), resolver=WebResolver, doc=doc) terms = list(tp) self.assertEqual(Term, tp.get_term_class('root.summary')) self.assertEqual(Term, tp.get_term_class('root.name')) self.assertEqual(SectionTerm, tp.get_term_class('root.section')) #self.assertEqual(Resource, tp.get_term_class('root.resource')) #self.assertEqual(Resource, tp.get_term_class('root.homepage')) class TestTermClass(Term): pass try: TermParser.register_term_class('root.name', TestTermClass) self.assertEqual(TestTermClass, tp.get_term_class('root.name')) doc = MetatabDoc(test_data('example1.csv')) self.assertEqual(Term, type(doc.find_first('root.description'))) self.assertEqual(TestTermClass, type(doc.find_first('root.name'))) #self.assertEqual(Resource, type(doc.find_first('root.datafile'))) #self.assertEqual(Resource, type(doc.find_first('root.homepage'))) finally: # Some test environments seem to run test multipel times in the same interpreter, # and if we leave this registration active the test for 'root.name' above will fail. TermParser.unregister_term_class('root.name')
def test_line_doc_parts(self): doc = MetapackDoc(TextRowGenerator("Declare: metatab-latest")) for fn in ( 'line/line-oriented-doc-root.txt', 'line/line-oriented-doc-contacts.txt', 'line/line-oriented-doc-datafiles.txt', 'line/line-oriented-doc-references-1.txt', 'line/line-oriented-doc-references-2.txt', 'line/line-oriented-doc-bib.txt', ): with open(test_data(fn)) as f: text = f.read() tp = TermParser(TextRowGenerator(text), resolver=doc.resolver, doc=doc) doc.load_terms(tp) self.assertEqual('47bc1089-7584-41f0-b804-602ec42f1249', doc.get_value('Root.Identifier')) self.assertEqual(157, len(doc.terms)) self.assertEqual(5, len(list(doc['References']))) self.assertEqual(5, len(list(doc['References'].find('Root.Reference')))) self.assertEqual(5, len(list(doc['References'].find( 'Root.Resource')))) # References are Resources rt = list(doc['References'].find('Root.Resource'))[0] self.assertIsInstance(rt, Reference) self.assertEqual(5, len(list(doc['Resources']))) self.assertEqual(5, len(list(doc['Resources'].find('Root.Datafile')))) self.assertEqual(5, len(list(doc['Resources'].find( 'Root.Resource')))) # References are Resources rt = list(doc['Resources'].find('Root.Resource'))[0] self.assertIsInstance(rt, Resource) doc._repr_html_() # Check no exceptions
def load_rows(self, row_generator): term_interp = TermParser(row_generator) return self.load_terms(term_interp)
def __init__(self, ref=None, decl=None, package_url=None, cache=None, clean_cache=False): self._cache = cache if cache else get_cache() self.decl_terms = {} self.decl_sections = {} self.terms = [] self.sections = OrderedDict() self.errors = [] self.package_url = package_url #if Url(self.package_url).proto == 'file': # path = abspath(parse_url_to_dict(self.package_url)['path']) # self.package_url = reparse_url(self.package_url, path = path) if decl is None: self.decls = [] elif not isinstance(decl, MutableSequence): self.decls = [decl] else: self.decls = decl self.load_declarations(self.decls) if ref: self._ref = ref self.root = None self._term_parser = TermParser(self._ref, doc=self) try: self.load_terms(self._term_parser) except SourceError as e: raise MetatabError( "Failed to load terms for document '{}': {}".format( self._ref, e)) u = Url(self._ref) if u.scheme == 'file': try: self._mtime = getmtime(u.parts.path) except (FileNotFoundError, OSError): self._mtime = 0 else: self._mtime = 0 else: self._ref = None self._term_parser = None self.root = SectionTerm('Root', term='Root', doc=self, row=0, col=0, file_name=None, parent=None) self.add_section(self.root) self._mtime = time()
def test_declarations(self): doc = MetatabDoc(test_data('example1.csv')) d = {k: v for k, v in doc.decl_terms.items() if 'homepage' in k} self.assertEqual(17, len(d)) self.assertIn("homepage.mediatype", d.keys()) self.assertIn("homepage.hash", d.keys()) self.assertIn("homepage.title", d.keys()) # Direct use of function ti = TermParser( CsvPathRowGenerator(declaration_path('metatab-latest')), False) ti.install_declare_terms() fn = test_data( 'example1.csv') # Not acutally used. Sets base directory doc = MetatabDoc( MetatabRowGenerator([['Declare', 'metatab-latest']], fn)) terms = doc.decl_terms self.assertIn('root.homepage', terms.keys()) self.assertIn('documentation.description', terms.keys()) self.assertEquals(247, len(terms.keys())) sections = doc.decl_sections self.assertEquals( { 'contacts', 'declaredterms', 'declaredsections', 'root', 'resources', 'schemas', 'sources', 'documentation', 'data' }, set(sections.keys())) # Use the Declare term fn = test_data('example1.csv') doc = MetatabDoc(CsvPathRowGenerator(fn)) d = doc._term_parser.declare_dict self.assertEqual({'terms', 'synonyms', 'sections'}, set(d.keys())) terms = d['terms'] self.assertIn('root.homepage', terms.keys()) self.assertIn('documentation.description', terms.keys()) self.assertEquals(247, len(terms.keys())) sections = d['sections'] self.assertEquals( { 'contacts', 'declaredterms', 'declaredsections', 'root', 'resources', 'schemas', 'sources', 'documentation', 'data' }, set(sections.keys())) self.assertEqual(['Email', 'Organization', 'Tel', 'Url'], sections['contacts']['args']) self.assertEqual(['TermValueName', 'ChildPropertyType', 'Section'], sections['declaredterms']['args']) self.assertEqual(['DataType', 'ValueType', 'Description'], sections['schemas']['args'])
def doc(self): """Return the Metatab metadata document""" if not self._doc and self._ref: self._doc = MetatabDoc(TermParser(self._ref)) return self._doc