Esempio n. 1
0
def encoding_of_doc_matches(test_docx_filename, expected_json_patterns):
    docx_as_dict = gita_encode.encode_doc(
        os.path.join('../test_data', test_docx_filename))
    docx_as_jsonstr_nospace = strip_whitespaces(json.dumps(docx_as_dict))
    matched = True
    for expected_json_regex in expected_json_patterns:
        expected_regex_nospace = strip_whitespaces(expected_json_regex)
        if re.search(expected_regex_nospace, docx_as_jsonstr_nospace) is None:
            print(f'--Regex not matched: {expected_json_regex}')
            matched = False
    return matched
Esempio n. 2
0
            del content_to_write["book-keep"]
            verses.append({
                "id": "*",
                "chapter": content["book-keep"]["chapterhead"],
                "shloka": content["book-keep"]["shlokahead"],
                "style": "shloka",
                "type": "text",
                "content": content_to_write
            })

    paras = docx_as_dict['paragraphs']
    content = blank_content()
    for i in range(len(paras)):
        content = form_presentable(paras, i, content)
        if content_boundary(paras, i):
            last_insight = extract_last_insight(content)
            add_to_verses(content)
            content = initial_content({"book-keep": {"lastinsight": last_insight}})
    return {"Verses": verses}


if __name__ == '__main__':
    docx_as_dict = gita_encode.encode_doc('GitaBhashya-try.docx')
    with open('paras.json', 'w') as docx_dict_file:
        json.dump(docx_as_dict, docx_dict_file, indent=2)
        print('Wrote docx to paras.json')
    verse_json = extract_verses(docx_as_dict)
    with open('verse.json', 'w') as verses_file:
        json.dump(verse_json, verses_file, indent=2)
        print("Wrote the verses to verse.json")
Esempio n. 3
0
import os
import json
import gita_encode

docx_as_dict = gita_encode.encode_doc(
    os.path.join('../test_data', 'sample chapter.docx'))
with open('sample chapter.json', 'w') as jsonfile:
    json.dump(docx_as_dict, jsonfile, indent=2)
Esempio n. 4
0
def paras_from(docx_filename):
    docx_as_dict = gita_encode.encode_doc(
        os.path.join('../test_data', docx_filename))
    return in_para_allcontent.paralist(docx_as_dict)