def testCoreToken_ProcessHeaders(mockGUI): """Test the header and page parser of the Tokenizer class. """ theProject = NWProject(mockGUI) theProject.projLang = "en" theProject._loadProjectLocalisation() theToken = BareTokenizer(theProject) # Nothing theToken._theText = "Some text ...\n" assert theToken.doHeaders() is False theToken._isNone = True assert theToken.doHeaders() is False theToken._isNone = False assert theToken.doHeaders() is False theToken._isNote = True assert theToken.doHeaders() is False theToken._isNote = False ## # Story FIles ## theToken._isNone = False theToken._isNote = False theToken._isNovel = True # Titles # ====== # H1: Title, First Page assert theToken._isFirst is True theToken._theText = "# Part One\n" theToken.setTitleFormat(r"T: %title%") theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_HEAD1, 1, "T: Part One", None, Tokenizer.A_CENTRE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H1: Title, Not First Page assert theToken._isFirst is False theToken._theText = "# Part One\n" theToken.setTitleFormat(r"T: %title%") theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_HEAD1, 1, "T: Part One", None, Tokenizer.A_PBB | Tokenizer.A_CENTRE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # Chapters # ======== # H2: Chapter theToken._theText = "## Chapter One\n" theToken.setChapterFormat(r"C: %title%") theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_HEAD2, 1, "C: Chapter One", None, Tokenizer.A_PBB), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H2: Unnumbered Chapter theToken._theText = "##! Prologue\n" theToken.setUnNumberedFormat(r"U: %title%") theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_UNNUM, 1, "U: Prologue", None, Tokenizer.A_PBB), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H2: Chapter Word Number theToken._theText = "## Chapter\n" theToken.setChapterFormat(r"Chapter %chw%") theToken._numChapter = 0 theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_HEAD2, 1, "Chapter One", None, Tokenizer.A_PBB), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H2: Chapter Roman Number Upper Case theToken._theText = "## Chapter\n" theToken.setChapterFormat(r"Chapter %chI%") theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_HEAD2, 1, "Chapter II", None, Tokenizer.A_PBB), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H2: Chapter Roman Number Lower Case theToken._theText = "## Chapter\n" theToken.setChapterFormat(r"Chapter %chi%") theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_HEAD2, 1, "Chapter iii", None, Tokenizer.A_PBB), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # Scenes # ====== # H3: Scene w/Title theToken._theText = "### Scene One\n" theToken.setSceneFormat(r"S: %title%", False) theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_HEAD3, 1, "S: Scene One", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene Hidden wo/Format theToken._theText = "### Scene One\n" theToken.setSceneFormat(r"", True) theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene wo/Format, first theToken._theText = "### Scene One\n" theToken.setSceneFormat(r"", False) theToken._firstScene = True theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene wo/Format, not first theToken._theText = "### Scene One\n" theToken.setSceneFormat(r"", False) theToken._firstScene = False theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_SKIP, 1, "", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene Separator, first theToken._theText = "### Scene One\n" theToken.setSceneFormat(r"* * *", False) theToken._firstScene = True theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene Separator, not first theToken._theText = "### Scene One\n" theToken.setSceneFormat(r"* * *", False) theToken._firstScene = False theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_SEP, 1, "* * *", None, Tokenizer.A_CENTRE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene w/Absolute Number theToken._theText = "### A Scene\n" theToken.setSceneFormat(r"Scene %sca%", False) theToken._numAbsScene = 0 theToken._numChScene = 0 theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_HEAD3, 1, "Scene 1", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H3: Scene w/Chapter Number theToken._theText = "### A Scene\n" theToken.setSceneFormat(r"Scene %ch%.%sc%", False) theToken._numAbsScene = 0 theToken._numChScene = 1 theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_HEAD3, 1, "Scene 3.2", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # Sections # ======== # H4: Section Hidden wo/Format theToken._theText = "#### A Section\n" theToken.setSectionFormat(r"", True) theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H4: Section Visible wo/Format theToken._theText = "#### A Section\n" theToken.setSectionFormat(r"", False) theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_SKIP, 1, "", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H4: Section w/Format theToken._theText = "#### A Section\n" theToken.setSectionFormat(r"X: %title%", False) theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_HEAD4, 1, "X: A Section", None, Tokenizer.A_NONE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # H4: Section Separator theToken._theText = "#### A Section\n" theToken.setSectionFormat(r"* * *", False) theToken.tokenizeText() theToken.doHeaders() assert theToken._theTokens == [ (Tokenizer.T_SEP, 1, "* * *", None, Tokenizer.A_CENTRE), (Tokenizer.T_EMPTY, 1, "", None, Tokenizer.A_NONE), ] # Check the first scene detector assert theToken._firstScene is False theToken._firstScene = True theToken._theText = "Some text ...\n" theToken.tokenizeText() theToken.doHeaders() assert theToken._firstScene is False
def testCoreToken_TextOps(monkeypatch, nwMinimal, mockGUI): """Test handling files and text in the Tokenizer class. """ theProject = NWProject(mockGUI) theProject.projTree.setSeed(42) theProject.projLang = "en" theProject._loadProjectLocalisation() theToken = BareTokenizer(theProject) theToken.setKeepMarkdown(True) assert theProject.openProject(nwMinimal) sHandle = "8c659a11cd429" # Set some content to work with docText = ( "### Scene Six\n\n" "This is text with _italic text_, some **bold text**, some ~~deleted text~~, " "and some **_mixed text_** and **some _nested_ text**.\n\n" "#### Replace\n\n" "Also, replace <A> and <B>.\n\n") docTextR = docText.replace("<A>", "this").replace("<B>", "that") nDoc = NWDoc(theProject, sHandle) assert nDoc.writeDocument(docText) theProject.setAutoReplace({"A": "this", "B": "that"}) assert theProject.saveProject() # Root Heading assert theToken.addRootHeading("stuff") is False assert theToken.addRootHeading(sHandle) is False # First Page assert theToken.addRootHeading("7695ce551d265") is True assert theToken.theMarkdown[-1] == "# Notes: Plot\n\n" assert theToken._theTokens[-1] == (Tokenizer.T_TITLE, 0, "Notes: Plot", None, Tokenizer.A_CENTRE) # Not First Page assert theToken.addRootHeading("7695ce551d265") is True assert theToken.theMarkdown[-1] == "# Notes: Plot\n\n" assert theToken._theTokens[-1] == (Tokenizer.T_TITLE, 0, "Notes: Plot", None, Tokenizer.A_CENTRE | Tokenizer.A_PBB) # Set Text assert theToken.setText("stuff") is False assert theToken.setText(sHandle) is True assert theToken._theText == docText with monkeypatch.context() as mp: mp.setattr("novelwriter.constants.nwConst.MAX_DOCSIZE", 100) assert theToken.setText(sHandle, docText) is True assert theToken._theText == ( "# ERROR\n\n" "Document 'New Scene' is too big (0.00 MB). Skipping.\n\n") assert theToken.setText(sHandle, docText) is True assert theToken._theText == docText assert theToken._isNone is False assert theToken._isNovel is True assert theToken._isNote is False # Pre Processing theToken.doPreProcessing() assert theToken._theText == docTextR # Post Processing theToken._theResult = r"This is text with escapes: \** \~~ \__" theToken.doPostProcessing() assert theToken.theResult == "This is text with escapes: ** ~~ __" # Save File savePath = os.path.join(nwMinimal, "dump.nwd") theToken.saveRawMarkdown(savePath) assert readFile(savePath) == ("# Notes: Plot\n\n" "# Notes: Plot\n\n") # Ckeck abstract method with pytest.raises(NotImplementedError): theToken.doConvert()