Esempio n. 1
0
def test_extract_example():
    parser = DocParser()
    with Path("doc_data/feefilter.txt").open() as file:
        doc_data = parser.parse(file.read())

    tester = DocTester("test_data")

    example = tester.extract_example(doc_data)
    assert example == "7cbd000000000000 ... satoshis per kilobyte: 48,508\n"
Esempio n. 2
0
 def test_example(self, path, object_name):
     name = path.name[:-4]
     if name == "intro":
         return None
     parser = DocParser()
     with path.open() as file:
         doc_data = parser.parse(file.read())
     example = self.extract_example(doc_data)
     if not example:
         return None
     if object_name and name != object_name:
         return None
     return self.test_object(name, example)
Esempio n. 3
0
def test_parse_args():
    parser = DocParser()
    assert parser.parse_args("") == []
    assert parser.parse_args(" ") == []
    assert parser.parse_args("one") == ["one"]
    assert parser.parse_args("one,two") == ["one", "two"]
    assert parser.parse_args("one, two two") == ["one", "two two"]
    assert parser.parse_args(" one, two ") == ["one", "two"]
Esempio n. 4
0
 def render_file(self, output_dir, name, message=None, title=True):
     """Create RST file in the given output directory for the message with
     the given name. Optionally takes a message object as returned by the
     CodeParser. This is rendered together with the information from the
     file in the `doc_data` directory which corresponds to the message with
     the given name.
     """
     filename = output_dir / (name + ".rst")
     with filename.open("w") as file:
         with (Path(__file__).parent / "doc_data" / (name + ".txt")).open() as data_file:
             doc_data = data_file.read()
         sections = DocParser().parse(doc_data)
         doc_processor = DocProcessor()
         doc_processor.message = name
         processed_sections = doc_processor.process(sections)
         file.write(self.render_copyright_header(processed_sections))
         if title:
             match = re.match("types/(.*)", name)
             if match:
                 title_str = match.group(1)
             else:
                 title_str = name
             file.write(title_str + "\n")
             file.write("-" * len(title_str) + "\n\n")
         if message:
             file.write(message["text"])
             if not doc_data.startswith("\n"):
                 file.write(" ")
             else:
                 file.write("\n")
         file.write(self.render_doc(name, processed_sections))
         file.write(self.render_link_targets())
         file.write(self.render_source_footer(processed_sections))
Esempio n. 5
0
def test_parse_table():
    doc_data = r"""\starttable One, Column Two, Three
row1 1
row1 2
row1 3

row2 1
row2 2
row2 3
\endtable
"""
    parsed = DocParser().parse(doc_data)

    assert parsed[0]["type"] == "table"
    assert parsed[0]["args"] == ["One", "Column Two", "Three"]
    assert parsed[0]["data"] == [
        {
            "One": "row1 1",
            "Column Two": "row1 2",
            "Three": "row1 3",
        },
        {
            "One": "row2 1",
            "Column Two": "row2 2",
            "Three": "row2 3",
        },
    ]
Esempio n. 6
0
def test_render_doc_example3():
    renderer = RendererRST()
    with open("test_data/doc_data/types/vector.txt") as doc_file:
        doc_data = DocProcessor().process(DocParser().parse(doc_file.read()))
        rendered = renderer.render_doc("types/vector", doc_data)
    rendered += renderer.render_link_targets()
    with open("test_data/expected/types/vector.rst") as expected_file:
        assert rendered == expected_file.read()
Esempio n. 7
0
def test_parse_format():
    doc_data = """one line
another line

\\startformat
name1
type1
description1

name2
type2
description2
\\endformat

\\startformat
name3
type3
description3
\\endformat

some more text
"""

    parsed = DocParser().parse(doc_data)

    assert len(parsed) == 5

    assert parsed[0]["type"] == "text"
    assert parsed[0]["data"] == "one line\nanother line\n\n"

    assert parsed[1]["type"] == "format"
    assert parsed[1]["data"] == [
        {
            "Name": "name1",
            "Data Type": "type1",
            "Description": "description1",
        },
        {
            "Name": "name2",
            "Data Type": "type2",
            "Description": "description2",
        },
    ]

    assert parsed[2]["type"] == "text"
    assert parsed[2]["data"] == "\n"

    assert parsed[3]["type"] == "format"
    assert parsed[3]["data"] == [
        {
            "Name": "name3",
            "Data Type": "type3",
            "Description": "description3",
        },
    ]

    assert parsed[4]["type"] == "text"
    assert parsed[4]["data"] == "\nsome more text\n"
Esempio n. 8
0
def test_parse_example():
    doc_data = r"""\startexample nointro
xyz
\endexample
"""
    parsed = DocParser().parse(doc_data)

    assert parsed[0]["type"] == "example"
    assert parsed[0]["args"] == ["nointro"]
    assert parsed[0]["data"] == "xyz\n"

    doc_data = r"""\startexample
xyz
\endexample
"""
    parsed = DocParser().parse(doc_data)

    assert parsed[0]["type"] == "example"
    assert parsed[0]["args"] == []
    assert parsed[0]["data"] == "xyz\n"
def main():
    embedder = Embedder()
    parser = DocParser()
    # iterate through grobid
    with open('grobid_data.pkl', 'wb') as output:
        for subdir, dirs, files in os.walk(grobid_path):
            print(len(files))
            count = 0
            for file in files:
                print(count)
                count += 1
                # print(os.path.join(subdir, file))

                doc = parser.parseXML(os.path.join(subdir, file))
                doc.id = str(file).split('.')[0]
                if len(doc.abstract) == 0:
                    continue
                doc.embedding = embedder.embed(doc.abstract)
                # pair = variablesFromPair((doc.abstract, doc.title), word_index, embedding_map)
                # if (len(pair[0]) == 0 or len(pair[1]) == 0):
                #     continue
                # doc.embedding = encode(encoder, pair[0])
                pickle.dump(doc, output, pickle.HIGHEST_PROTOCOL)
Esempio n. 10
0
def test_render_doc_example1():
    with open("test_data/doc_data/example.txt") as doc_file:
        doc_data = DocProcessor().process(DocParser().parse(doc_file.read()))
        rendered = RendererRST().render_doc("example", doc_data)
    with open("test_data/expected/example.rst") as expected_file:
        assert rendered == expected_file.read()
Esempio n. 11
0
def test_parse_error():
    doc_data = r"\startformat\n"

    with pytest.raises(DocParser.Error):
        DocParser().parse(doc_data)
Esempio n. 12
0
def test_parse_todo():
    doc_data = r"\todo some thing"
    parsed = DocParser().parse(doc_data)
    assert parsed[0]["type"] == "todo"
    assert parsed[0]["args"] == "some thing"
    assert "data" not in parsed[0]
Esempio n. 13
0
def test_parse_copyright():
    doc_data = r"\copyright bitcoin, unit-e"
    parsed = DocParser().parse(doc_data)
    assert parsed[0]["type"] == "copyright"
    assert parsed[0]["args"] == ["bitcoin", "unit-e"]
    assert "data" not in parsed[0]
Esempio n. 14
0
def test_parse_figure():
    doc_data = r"\figure /some/path, Some title"
    parsed = DocParser().parse(doc_data)
    assert parsed[0]["type"] == "figure"
    assert parsed[0]["args"] == ["/some/path", "Some title"]
    assert "data" not in parsed[0]