Python CapitainsCtsText.getValidReffの例

プログラミング言語: Python

名前空間/パッケージ名: MyCapytain.resources.texts.local.capitains.cts

クラス/型: CapitainsCtsText

メソッド/関数: getValidReff

hotexamples.comのコード掲載数: 6

Python CapitainsCtsText.getValidReff - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのMyCapytain.resources.texts.local.capitains.cts.CapitainsCtsText.getValidReffの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

CapitainsCtsText(19)

getTextualNode(7)

getValidReff(6)

getReffs(4)

export(2)

citation(1)

コード例 #1

0

ファイルを表示

ファイル: commonTests.py プロジェクト: rillian/MyCapytain

    def test_warning(self):
        with open("tests/testing_data/texts/duplicate_references.xml") as xml:
            text = CapitainsCtsText(resource=xml)
        with warnings.catch_warnings(record=True) as w:
            # Cause all warnings to always be triggered.
            warnings.simplefilter("always")
            for i in [1, 2, 3]:
                text.getValidReff(level=i, _debug=True)

        self.assertEqual(len(w), 3, "There should be warning on each level")
        self.assertEqual(
            issubclass(w[-1].category, MyCapytain.errors.DuplicateReference),
            True, "Warning should be DuplicateReference")
        self.assertEqual(str(w[0].message), "1",
                         "Warning message should be list of duplicate")

コード例 #2

0

ファイルを表示

ファイル: commonTests.py プロジェクト: rillian/MyCapytain

 def test_get_passage_hypercontext_complex_xpath(self):
     simple = self.text_complex.getTextualNode(Reference("pr.1-1.2"))
     str_simple = simple.tostring(encoding=str)
     text = CapitainsCtsText(resource=str_simple,
                             citation=self.text_complex.citation)
     self.assertIn(
         "Pervincis tandem",
         text.getTextualNode(Reference("pr.1"),
                             simple=True).export(output=Mimetypes.PLAINTEXT,
                                                 exclude=["tei:note"
                                                          ]).strip(),
         "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range CapitainsCtsPassage)"
     )
     self.assertEqual(
         text.getTextualNode(
             Reference("1.2"),
             simple=True).export(output=Mimetypes.PLAINTEXT).strip(),
         "lusimus quos in Suebae gratiam virgunculae,",
         "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range CapitainsCtsPassage)"
     )
     self.assertEqual(
         list(map(lambda x: str(x),
                  text.getValidReff(level=2))), ["pr.1", "1.1", "1.2"],
         "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range CapitainsCtsPassage)"
     )

コード例 #3

0

ファイルを表示

ファイル: commonTests.py プロジェクト: rillian/MyCapytain

    def test_empty_ref_warning(self):
        with open("tests/testing_data/texts/empty_references.xml") as xml:
            text = CapitainsCtsText(resource=xml)
        with warnings.catch_warnings(record=True) as w:
            # Cause all warnings to always be triggered.
            warnings.simplefilter("always")
            for i in [1, 2, 3]:
                text.getValidReff(level=i, _debug=True)

        self.assertEqual(len(w), 3, "There should be warning on each level")
        self.assertEqual(
            issubclass(w[-1].category, MyCapytain.errors.EmptyReference), True,
            "Warning should be EmptyReference")
        self.assertEqual([str(s.message) for s in w], [
            "1 empty reference(s) at citation level 1",
            "1 empty reference(s) at citation level 2",
            "1 empty reference(s) at citation level 3"
        ], "Warning message should indicate number of references and the level at which they occur"
                         )

コード例 #4

0

ファイルを表示

ファイル: commonTests.py プロジェクト: rillian/MyCapytain

    def test_get_passage_hyper_context_double_slash_xpath(self):
        simple = self.seneca.getTextualNode(Reference("1-10"))
        str_simple = simple.export(output=Mimetypes.XML.Std)
        text = CapitainsCtsText(resource=str_simple,
                                citation=self.seneca.citation)
        self.assertEqual(
            text.getTextualNode(Reference("1"),
                                simple=True).export(output=Mimetypes.PLAINTEXT,
                                                    exclude=["tei:note"
                                                             ]).strip(),
            "Di coniugales tuque genialis tori,",
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range CapitainsCtsPassage)"
        )
        self.assertEqual(
            text.getTextualNode(
                Reference("10"),
                simple=True).export(output=Mimetypes.PLAINTEXT).strip(),
            "aversa superis regna manesque impios",
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range CapitainsCtsPassage)"
        )
        self.assertEqual(
            list(map(lambda x: str(x), text.getValidReff(level=1))),
            ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range CapitainsCtsPassage)"
        )

        simple = self.seneca.getTextualNode(Reference("1"))
        str_simple = simple.tostring(encoding=str)
        text = CapitainsCtsText(resource=str_simple,
                                citation=self.seneca.citation)
        self.assertEqual(
            text.getTextualNode(Reference("1"),
                                simple=True).export(output=Mimetypes.PLAINTEXT,
                                                    exclude=["tei:note"
                                                             ]).strip(),
            "Di coniugales tuque genialis tori,",
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range CapitainsCtsPassage)"
        )
        self.assertEqual(
            list(map(lambda x: str(x), text.getValidReff(level=1))), ["1"],
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range CapitainsCtsPassage)"
        )

コード例 #5

0

ファイルを表示

ファイル: commonTests.py プロジェクト: rillian/MyCapytain

    def test_get_Passage_context_no_double_slash(self):
        """ Check that get CapitainsCtsPassage contexts return right information """
        simple = self.TEI.getTextualNode(Reference("1.pr.2"))
        str_simple = simple.tostring(encoding=str)
        text = CapitainsCtsText(resource=str_simple,
                                citation=self.TEI.citation)
        self.assertEqual(
            text.getTextualNode(
                Reference("1.pr.2"),
                simple=True).export(output=Mimetypes.PLAINTEXT).strip(),
            "tum, ut de illis queri non possit quisquis de se bene",
            "Ensure passage finding with context is fully TEI / Capitains compliant (One reference CapitainsCtsPassage)"
        )

        simple = self.TEI.getTextualNode(Reference("1.pr.2-1.pr.7"))
        str_simple = simple.tostring(encoding=str)
        text = CapitainsCtsText(resource=str_simple,
                                citation=self.TEI.citation)
        self.assertEqual(
            text.getTextualNode(
                Reference("1.pr.2"),
                simple=True).export(output=Mimetypes.PLAINTEXT).strip(),
            "tum, ut de illis queri non possit quisquis de se bene",
            "Ensure passage finding with context is fully TEI / Capitains compliant (Same level same "
            "parent range CapitainsCtsPassage)")
        self.assertEqual(
            text.getTextualNode(
                Reference("1.pr.3"),
                simple=True).export(output=Mimetypes.PLAINTEXT).strip(),
            "senserit, cum salva infimarum quoque personarum re-",
            "Ensure passage finding with context is fully TEI / Capitains compliant (Same level same "
            "parent range CapitainsCtsPassage)")
        self.assertEqual(
            list(map(lambda x: str(x), text.getValidReff(level=3))),
            ["1.pr.2", "1.pr.3", "1.pr.4", "1.pr.5", "1.pr.6", "1.pr.7"],
            "Ensure passage finding with context is fully TEI / Capitains compliant (Same level same "
            "parent range CapitainsCtsPassage)")

        simple = self.TEI.getTextualNode(Reference("1.pr.2-1.1.6"))
        str_simple = simple.tostring(encoding=str)
        text = CapitainsCtsText(resource=str_simple,
                                citation=self.TEI.citation)
        self.assertEqual(
            text.getTextualNode(
                Reference("1.pr.2"),
                simple=True).export(output=Mimetypes.PLAINTEXT).strip(),
            "tum, ut de illis queri non possit quisquis de se bene",
            "Ensure passage finding with context is fully TEI / Capitains compliant (Same level range CapitainsCtsPassage)"
        )
        self.assertEqual(
            text.getTextualNode(
                Reference("1.1.6"),
                simple=True).export(output=Mimetypes.PLAINTEXT).strip(),
            "Rari post cineres habent poetae.",
            "Ensure passage finding with context is fully TEI / Capitains compliant (Same level range CapitainsCtsPassage)"
        )
        self.assertEqual(
            list(map(lambda x: str(x), text.getValidReff(level=3))), [
                "1.pr.2",
                "1.pr.3",
                "1.pr.4",
                "1.pr.5",
                "1.pr.6",
                "1.pr.7",
                "1.pr.8",
                "1.pr.9",
                "1.pr.10",
                "1.pr.11",
                "1.pr.12",
                "1.pr.13",
                "1.pr.14",
                "1.pr.15",
                "1.pr.16",
                "1.pr.17",
                "1.pr.18",
                "1.pr.19",
                "1.pr.20",
                "1.pr.21",
                "1.pr.22",
                "1.1.1",
                "1.1.2",
                "1.1.3",
                "1.1.4",
                "1.1.5",
                "1.1.6",
            ],
            "Ensure passage finding with context is fully TEI / Capitains compliant (Same level range CapitainsCtsPassage)"
        )

        simple = self.TEI.getTextualNode(Reference("1.pr.2-1.2"))
        str_simple = simple.tostring(encoding=str)
        text = CapitainsCtsText(resource=str_simple,
                                citation=self.TEI.citation)
        self.assertEqual(
            text.getTextualNode(
                Reference("1.pr.2"),
                simple=True).export(output=Mimetypes.PLAINTEXT).strip(),
            "tum, ut de illis queri non possit quisquis de se bene",
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range CapitainsCtsPassage)"
        )
        self.assertEqual(
            text.getTextualNode(
                Reference("1.1.6"),
                simple=True).export(output=Mimetypes.PLAINTEXT).strip(),
            "Rari post cineres habent poetae.",
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range CapitainsCtsPassage)"
        )
        self.assertEqual(
            list(map(lambda x: str(x), text.getValidReff(level=3))), [
                "1.pr.2", "1.pr.3", "1.pr.4", "1.pr.5", "1.pr.6", "1.pr.7",
                "1.pr.8", "1.pr.9", "1.pr.10", "1.pr.11", "1.pr.12", "1.pr.13",
                "1.pr.14", "1.pr.15", "1.pr.16", "1.pr.17", "1.pr.18",
                "1.pr.19", "1.pr.20", "1.pr.21", "1.pr.22", "1.1.1", "1.1.2",
                "1.1.3", "1.1.4", "1.1.5", "1.1.6", '1.2.1', '1.2.2', '1.2.3',
                '1.2.4', '1.2.5', '1.2.6', '1.2.7', '1.2.8'
            ],
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range CapitainsCtsPassage)"
        )

コード例 #6

0

ファイルを表示

ファイル: cts.py プロジェクト: rillian/HookTest

class CTSText_TestUnit(TESTUnit):
    """ CTS testing object

    :param path: Path to the file
    :type path: basestring
    :param countwords: Count the number of words and log it if necessary
    :type countwords: bool

    :cvar tests: Contains the list of methods to be run again the text
    :type tests: [str]
    :cvar readable: Human friendly string associated to object methods
    :type readable: dict

    :ivar inv: List of URN retrieved in metadata. Used to check the availability of metadata for the text
    :type inv: [str]
    :ivar scheme: Scheme to be used to check the
    :type scheme: str
    :ivar Text: Text object according to MyCapytains parsing. Used to find passages
    :type Text: MyCapytain.resources.text.local.Text

    Shared variables with parent class:

    :ivar path: Path for the resource
    :type path: str
    :ivar xml: XML resource, parsed in python. Used to do general checking
    :type xml: lxml._etree.Element

    .. note:: All method in CTSText_TestUnit.tests ( "parsable", "has_urn", "naming_convention", "refsDecl", "passages", \
    "unique_passage", "inventory" ) yield at least one boolean (might be more) which represents the success of it.
    """

    tests = [
        # Parsing the XML
        "parsable",
        # Retrieving the URN (requires parsale
        "has_urn", 'language',
        # Requires has_urn
        "inventory", "naming_convention",
        # Requires parsable
        "refsDecl", "passages", "unique_passage", "duplicate", "forbidden", "empty"
    ]
    breaks = [
        "parsable",
        "refsDecl",
        "passages"
    ]
    readable = {
        "parsable": "File parsing",
        "refsDecl": "RefsDecl parsing",
        "passages": "Passage level parsing",
        "duplicate": "Duplicate passages",
        "forbidden": "Forbidden characters",
        "epidoc": "Epidoc DTD validation",
        "tei": "TEI DTD Validation",
        "auto_rng": "Automatic RNG validation",
        "local_file": "Custom local RNG validation",
        "has_urn": "URN informations",
        "naming_convention": "Naming conventions",
        "inventory": "Available in inventory",
        "unique_passage": "Unique nodes found by XPath",
        "count_words": "Word Counting",
        "language": "Correct xml:lang attribute",
        "empty": "Empty References"
    }
    splitter = re.compile(r'\S+', re.MULTILINE)

    def __init__(self, path, countwords=False, timeout=30, *args, **kwargs):
        self.inv = list()
        self.timeout = timeout
        self.scheme = None
        self.guidelines = None
        self.rng = None
        self.Text = None
        self.xml = None
        self.count = 0
        self.countwords = countwords
        self.citation = list()
        self.duplicates = list()
        self.forbiddens = list()
        self.empties = list()
        self.capitains_errors = list()
        self.test_status = defaultdict(bool)
        self.lang = ''
        self.dtd_errors = list()
        super(CTSText_TestUnit, self).__init__(path, *args, **kwargs)

    def parsable(self):
        """ Chacke that the text is parsable (as XML) and ingest it through MyCapytain then.

        .. note:: Override super(parsable) and add CapiTainS Ingesting to it
        """
        status = next(
            super(CTSText_TestUnit, self).parsable()
        )
        if status is True:
            try:
                self.Text = CapitainsCtsText(resource=self.xml.getroot())
            except MissingRefsDecl as E:
                self.Text = None
                self.log(str(E))
                self.capitains_errors.append(str(E))
                yield False
        else:
            self.Text = None
        yield status

    def refsDecl(self):
        """ Check that the text contains refsDecl informations
        """
        if self.Text:
            # In 1.0.1, MyCapytain actually create an empty citation by default
            if not self.Text.citation.isEmpty():
                self.log(str(len(self.Text.citation)) + " citation's level found")
                yield True
            else:
                yield False
        else:
            yield False

    def run_rng(self, rng_path):
        """ Run the RNG through JingTrang

        :param rng_path: Path to the RelaxNG file to run against the XML to test
        """
        test = subprocess.Popen(
            ["java", "-Duser.country=US",  "-Duser.language=en", "-jar", TESTUnit.JING, rng_path, self.path],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            shell=False
        )
        out = []
        error = []
        timer = Timer(self.timeout, test.kill)
        try:
            timer.start()
            out, error = test.communicate()
        except Exception as E:
            self.error(E)
            yield False
            pass
        finally:
            if not timer.isAlive():
                self.log("Timeout on RelaxNG")
                yield False
                timer.cancel()
                pass
            timer.cancel()

        # This is to deal with Travis printing a message about the _JAVA_OPTIONS when a java command is run
        # Travis printing this command resulted in this test not passing
        out = '\n'.join([x for x in out.decode().split('\n') if '_JAVA_OPTIONS' not in x]).encode()
        error = '\n'.join([x for x in error.decode().split('\n') if '_JAVA_OPTIONS' not in x]).encode()

        if len(out) > 0:
            for issue in TESTUnit.rng_logs(out):
                self.log(issue)
                self.dtd_errors.append(issue)
        yield len(out) == 0 and len(error) == 0

    def auto_rng(self):
        xml = parse(self.path)
        xml_dir = os.path.dirname(os.path.abspath(self.path))
        # A file can have multiple schema
        for rng in xml.xpath("/processing-instruction('xml-model')"):
            uri = rng.attrib["href"]
            rng_path = os.path.abspath(os.path.join(xml_dir, uri))
            if validators.url(uri):
                rng_path = self.get_remote_rng(uri)
            elif not os.path.isfile(rng_path):
                self.dtd_errors.append("No RNG was found at " + rng_path)
                yield False
                continue
            for status in self.run_rng(rng_path):
                yield status

    def get_remote_rng(self, url):
        """ Given a valid URL, downloads the RNG from the given URL and returns the filepath and name

        :param url: the URL of the RNG
        :return: filenpath and name where the RNG was saved
        """
        # If the file is remote, have a file-system approved name
        # The md5 hash seems like a good option
        sha = md5(url.encode()).hexdigest()

        # We have a name for the rng file but also for the in-download marker
        # Note : we might want to add a os.makedirs somewhere with exists=True
        makedirs(".rngs", exist_ok=True)
        stable_local = os.path.join(".rngs", sha+".rng")
        stable_local_downloading = os.path.join(".rngs", sha+".rng-indownload")

        # check if the stable_local rng already exists
        # if it does, immediately run the rng test and move to the next rng in the file
        if os.path.exists(stable_local):
            return stable_local
        # We check if the in-download proof file is shown here
        # Until the in-download marker is there, we need to wait
        elif os.path.exists(stable_local_downloading):
            # Wait up to 30 secs ?
            # Have it as a constant that could be changed in environment variables ?
            waited = self.timeout
            while not os.path.exists(stable_local):
                time.sleep(1)
                waited -= 1
                if waited < 0:
                    # Maybe we can wait more ?
                    raise EnvironmentError("The download of the RNG took too long")
        else:
            with open(stable_local_downloading, "w") as f:
                f.write("Downloading...")
            data = requests.get(url)
            data.raise_for_status()
            with open(stable_local_downloading, "w") as f:
                f.write(data.text)
            shutil.move(stable_local_downloading, stable_local)

        return stable_local

    def epidoc(self):
        """ Check the original file against Epidoc rng through a java pipe
        """
        for status in self.run_rng(TESTUnit.EPIDOC):
            yield status

    def tei(self):
        """ Check the original file against TEI rng through a java pipe
        """

        for status in self.run_rng(TESTUnit.TEI_ALL):
            yield status

    def local_file(self):
        """ Check the original file against TEI rng through a java pipe
        """

        for status in self.run_rng(self.rng):
            yield status

    def passages(self):
        """  Check that passages are available at each level. On top of that, it checks for forbidden characters \
        and duplicate in references

        """
        if self.Text and self.Text.citation.refsDecl:
            citations = [c.name for c in self.Text.citation]
            for i in range(0, len(self.Text.citation)):
                try:
                    with warnings.catch_warnings(record=True) as warning_record:
                        # Cause all warnings to always be triggered.
                        warnings.simplefilter("always")
                        passages = self.Text.getValidReff(level=i+1, _debug=True)
                        ids = [ref.split(".", i)[-1] for ref in passages]
                        space_in_passage = TESTUnit.FORBIDDEN_CHAR.search("".join(ids))
                        len_passage = len(passages)
                        status = len_passage > 0
                        self.log(str(len_passage) + " found")
                        self.citation.append((i, len_passage, citations[i]))
                        for record in warning_record:
                            if record.category == DuplicateReference:
                                self.duplicates += sorted(str(record.message).split(", "))
                            if record.category == EmptyReference:
                                self.empties += [str(record.message)]
                        if space_in_passage and space_in_passage is not None:
                            self.forbiddens += ["'{}'".format(n)
                                                for ref, n in zip(ids, passages)
                                                if TESTUnit.FORBIDDEN_CHAR.search(ref)]
                        if status is False:
                            yield status
                            break
                        yield status
                except Exception as E:
                    self.error(E)
                    self.log("Error when searching passages at level {0}".format(i+1))
                    yield False
                    break
        else:
            yield False

    def duplicate(self):
        """ Detects duplicate references

        """
        if len(self.duplicates) > 0:
            self.log("Duplicate references found : {0}".format(", ".join(self.duplicates)))
            yield False
        elif self.test_status['passages'] is False:
            yield False
        else:
            yield True

    def forbidden(self):
        """ Checks for forbidden characters in references

        """
        if len(self.forbiddens) > 0:
            self.log("Reference with forbidden characters found: {0}".format(", ".join(self.forbiddens)))
            yield False
        elif self.test_status['passages'] is False:
            yield False
        else:
            yield True

    def empty(self):
        """ Detects empty references

        """
        if len(self.empties) > 0:
            self.log("Empty references found : {0}".format(", ".join(self.empties)))
            yield False
        elif self.test_status['passages'] is False:
            yield False
        else:
            yield True

    def unique_passage(self):
        """ Check that citation scheme do not collide (eg. Where text:1 would be the same node as text:1.1)
        """
        try:
            # Checking for duplicate
            xpaths = [
                self.Text.xml.xpath(
                    MyCapytain.common.reference.REFERENCE_REPLACER.sub(
                        r"\1",
                        citation.refsDecl
                    ),
                    namespaces=TESTUnit.NS
                )
                for citation in self.Text.citation
            ]
            nodes = [element for xpath in xpaths for element in xpath]
            bad_citation = len(nodes) == len(set(nodes))
            if not bad_citation:
                self.log("Some node are found twice")
                yield False
            else:
                yield True
        except Exception:
            yield False

    def has_urn(self):
        """ Test that a file has its urn according to CapiTainS Guidelines in its scheme
        """
        if self.xml is not None:
            if self.guidelines == "2.tei":
                urns = self.xml.xpath("//tei:text/tei:body[starts-with(@n, 'urn:cts:')]", namespaces=TESTUnit.NS) + \
                        self.xml.xpath("//tei:text[starts-with(@xml:base, 'urn:cts:')]", namespaces=TESTUnit.NS)
            else:
                urns = self.xml.xpath(
                    "//tei:body/tei:div[@type='edition' and starts-with(@n, 'urn:cts:')]",
                    namespaces=TESTUnit.NS
                )
                urns += self.xml.xpath(
                    "//tei:body/tei:div[@type='translation' and starts-with(@n, 'urn:cts:')]",
                    namespaces=TESTUnit.NS
                )
                urns += self.xml.xpath(
                    "//tei:body/tei:div[@type='commentary' and starts-with(@n, 'urn:cts:')]",
                    namespaces=TESTUnit.NS
                )
            status = len(urns) > 0
            if status:
                logs = urns[0].get("n")
                if not logs:
                    logs = urns[0].base
                urn = MyCapytain.common.reference.URN(logs)
                missing_members = [
                    key for key in ['namespace', 'work', 'version', 'textgroup']
                    if getattr(urn, key) is None or len(getattr(urn, key)) == 0
                ]
                if len(urn) < 5:
                    status = False
                    self.log("Incomplete URN")
                elif urn.reference:
                    status = False
                    self.log("Reference not accepted in URN")
                elif len(missing_members) > 0:
                    status = False
                    self.log("Elements of URN are empty: {}".format(", ".join(sorted(missing_members))))
                self.urn = logs
        else:
            status = False
        yield status

    def naming_convention(self):
        """ Check the naming convention of the file
        """
        if self.urn:
            yield self.urn.split(":")[-1] in self.path
        else:
            yield False

    def inventory(self):
        """ Check the naming convention of the file
        """
        if self.urn and self.inv:
            yield self.urn in self.inv
        else:
            yield False

    def count_words(self):
        """ Count words in a file
        """
        status = False
        if self.test_status["passages"]:
            text = self.Text.export(Mimetypes.PLAINTEXT, exclude=["tei:note", "tei:teiHeader"])
            self.count = len(type(self).splitter.findall(text))

            self.log("{} has {} words".format(self.urn, self.count))
            status = self.count > 0
        yield status

    def language(self):
        """ Tests to make sure an xml:lang element is on the correct node
        """
        if self.guidelines == "2.epidoc":
            urns_holding_node = self.xml.xpath(
                "//tei:text/tei:body/tei:div"
                "[@type='edition' or @type='translation' or @type='commentary']"
                "[starts-with(@n, 'urn:cts:')]",
                namespaces=TESTUnit.NS
            )
        elif self.guidelines == "2.tei":
            urns_holding_node = self.xml.xpath("//tei:text/tei:body[starts-with(@n, 'urn:cts:')]", namespaces=TESTUnit.NS) + \
                    self.xml.xpath("//tei:text[starts-with(@xml:base, 'urn:cts:')]", namespaces=TESTUnit.NS)

        try:
            self.lang = urns_holding_node[0].get('{http://www.w3.org/XML/1998/namespace}lang')
        except:
            self.lang = ''
        if self.lang == '' or self.lang is None:
            self.lang = 'UNK'
            yield False
        else:
            yield True

    def test(self, scheme, guidelines, rng=None, inventory=None):
        """ Test a file with various checks

        :param scheme: Test with TEI DTD
        :type scheme: str
        :param inventory: URNs to be matched against
        :type inventory: list
        :returns: Iterator containing human readable test name, boolean status and logs
        :rtype: iterator(str, bool, list(str))
        """
        if inventory is not None:
            self.inv = inventory
        tests = [] + CTSText_TestUnit.tests
        if self.countwords:
            tests.append("count_words")

        if scheme in["tei", "epidoc", "auto_rng", "local_file"]:
            tests = [scheme] + tests

        self.scheme = scheme
        self.guidelines = guidelines
        self.rng = rng
        if environ.get("HOOKTEST_DEBUG", False):
            print("Starting %s " % self.path)
        i = 0
        for test in tests:

            # Show the logs and return the status

            if environ.get("HOOKTEST_DEBUG", False):
                print("\t Testing %s " % test)
            status = False not in [status for status in getattr(self, test)()]
            self.test_status[test] = status
            yield (CTSText_TestUnit.readable[test], status, self.logs)
            if test in self.breaks and status == False:
                for t in tests[i+1:]:
                    self.test_status[t] = False
                    yield (CTSText_TestUnit.readable[t], False, [])
                break
            self.flush()
            i += 1