def test_extract_example(): parser = DocParser() with Path("doc_data/feefilter.txt").open() as file: doc_data = parser.parse(file.read()) tester = DocTester("test_data") example = tester.extract_example(doc_data) assert example == "7cbd000000000000 ... satoshis per kilobyte: 48,508\n"
def test_example(self, path, object_name): name = path.name[:-4] if name == "intro": return None parser = DocParser() with path.open() as file: doc_data = parser.parse(file.read()) example = self.extract_example(doc_data) if not example: return None if object_name and name != object_name: return None return self.test_object(name, example)
def test_parse_args(): parser = DocParser() assert parser.parse_args("") == [] assert parser.parse_args(" ") == [] assert parser.parse_args("one") == ["one"] assert parser.parse_args("one,two") == ["one", "two"] assert parser.parse_args("one, two two") == ["one", "two two"] assert parser.parse_args(" one, two ") == ["one", "two"]
def render_file(self, output_dir, name, message=None, title=True): """Create RST file in the given output directory for the message with the given name. Optionally takes a message object as returned by the CodeParser. This is rendered together with the information from the file in the `doc_data` directory which corresponds to the message with the given name. """ filename = output_dir / (name + ".rst") with filename.open("w") as file: with (Path(__file__).parent / "doc_data" / (name + ".txt")).open() as data_file: doc_data = data_file.read() sections = DocParser().parse(doc_data) doc_processor = DocProcessor() doc_processor.message = name processed_sections = doc_processor.process(sections) file.write(self.render_copyright_header(processed_sections)) if title: match = re.match("types/(.*)", name) if match: title_str = match.group(1) else: title_str = name file.write(title_str + "\n") file.write("-" * len(title_str) + "\n\n") if message: file.write(message["text"]) if not doc_data.startswith("\n"): file.write(" ") else: file.write("\n") file.write(self.render_doc(name, processed_sections)) file.write(self.render_link_targets()) file.write(self.render_source_footer(processed_sections))
def test_parse_table(): doc_data = r"""\starttable One, Column Two, Three row1 1 row1 2 row1 3 row2 1 row2 2 row2 3 \endtable """ parsed = DocParser().parse(doc_data) assert parsed[0]["type"] == "table" assert parsed[0]["args"] == ["One", "Column Two", "Three"] assert parsed[0]["data"] == [ { "One": "row1 1", "Column Two": "row1 2", "Three": "row1 3", }, { "One": "row2 1", "Column Two": "row2 2", "Three": "row2 3", }, ]
def test_render_doc_example3(): renderer = RendererRST() with open("test_data/doc_data/types/vector.txt") as doc_file: doc_data = DocProcessor().process(DocParser().parse(doc_file.read())) rendered = renderer.render_doc("types/vector", doc_data) rendered += renderer.render_link_targets() with open("test_data/expected/types/vector.rst") as expected_file: assert rendered == expected_file.read()
def test_parse_format(): doc_data = """one line another line \\startformat name1 type1 description1 name2 type2 description2 \\endformat \\startformat name3 type3 description3 \\endformat some more text """ parsed = DocParser().parse(doc_data) assert len(parsed) == 5 assert parsed[0]["type"] == "text" assert parsed[0]["data"] == "one line\nanother line\n\n" assert parsed[1]["type"] == "format" assert parsed[1]["data"] == [ { "Name": "name1", "Data Type": "type1", "Description": "description1", }, { "Name": "name2", "Data Type": "type2", "Description": "description2", }, ] assert parsed[2]["type"] == "text" assert parsed[2]["data"] == "\n" assert parsed[3]["type"] == "format" assert parsed[3]["data"] == [ { "Name": "name3", "Data Type": "type3", "Description": "description3", }, ] assert parsed[4]["type"] == "text" assert parsed[4]["data"] == "\nsome more text\n"
def test_parse_example(): doc_data = r"""\startexample nointro xyz \endexample """ parsed = DocParser().parse(doc_data) assert parsed[0]["type"] == "example" assert parsed[0]["args"] == ["nointro"] assert parsed[0]["data"] == "xyz\n" doc_data = r"""\startexample xyz \endexample """ parsed = DocParser().parse(doc_data) assert parsed[0]["type"] == "example" assert parsed[0]["args"] == [] assert parsed[0]["data"] == "xyz\n"
def main(): embedder = Embedder() parser = DocParser() # iterate through grobid with open('grobid_data.pkl', 'wb') as output: for subdir, dirs, files in os.walk(grobid_path): print(len(files)) count = 0 for file in files: print(count) count += 1 # print(os.path.join(subdir, file)) doc = parser.parseXML(os.path.join(subdir, file)) doc.id = str(file).split('.')[0] if len(doc.abstract) == 0: continue doc.embedding = embedder.embed(doc.abstract) # pair = variablesFromPair((doc.abstract, doc.title), word_index, embedding_map) # if (len(pair[0]) == 0 or len(pair[1]) == 0): # continue # doc.embedding = encode(encoder, pair[0]) pickle.dump(doc, output, pickle.HIGHEST_PROTOCOL)
def test_render_doc_example1(): with open("test_data/doc_data/example.txt") as doc_file: doc_data = DocProcessor().process(DocParser().parse(doc_file.read())) rendered = RendererRST().render_doc("example", doc_data) with open("test_data/expected/example.rst") as expected_file: assert rendered == expected_file.read()
def test_parse_error(): doc_data = r"\startformat\n" with pytest.raises(DocParser.Error): DocParser().parse(doc_data)
def test_parse_todo(): doc_data = r"\todo some thing" parsed = DocParser().parse(doc_data) assert parsed[0]["type"] == "todo" assert parsed[0]["args"] == "some thing" assert "data" not in parsed[0]
def test_parse_copyright(): doc_data = r"\copyright bitcoin, unit-e" parsed = DocParser().parse(doc_data) assert parsed[0]["type"] == "copyright" assert parsed[0]["args"] == ["bitcoin", "unit-e"] assert "data" not in parsed[0]
def test_parse_figure(): doc_data = r"\figure /some/path, Some title" parsed = DocParser().parse(doc_data) assert parsed[0]["type"] == "figure" assert parsed[0]["args"] == ["/some/path", "Some title"] assert "data" not in parsed[0]