def downloadTestamentAlignments(userUrl, bibleType, newTestament, outputBasePath): file.ensureFolderExists(outputBasePath) books = getBookList(newTestament) for bookId in books: print('downloadTestamentAlignments downloading book ' + bookId) downloadBookAlignments(userUrl, bibleType, bookId, outputBasePath)
def downloadBookAlignments(userUrl, bibleType, bookId, outputBasePath): # https://git.door43.org/lrsallee/en_ult_act_book/raw/branch/master/en_ult_act_book.usfm url = file.getBookUrl(userUrl, bibleType, bookId) repoName = file.getRepoName(bibleType, bookId) outputFolder = outputBasePath + '/' + repoName file.ensureFolderExists(outputFolder) usfmDestPath = outputFolder + '/' + repoName + '.usfm' jsonOutput = outputBasePath + '/' + bookId if not os.path.isfile(usfmDestPath): try: file.downloadFile(url, usfmDestPath) file.ensureFolderExists(jsonOutput) system.convertUsfmToJson(usfmDestPath, jsonOutput) except: print(f'download of {url} failed') else: print('file already exists, skipping ' + usfmDestPath) file.removeEmptyFolder(outputFolder) # don't leave empty folders behind
def getConfig(): newTestament = True testamentStr = "NT" if newTestament else "OT" targetLang = "en" targetBibleId = "ult" tWordsId = "tw" tWordsResourceName = 'bible' origLangVersionGreek = '0.16' origLangVersionHebrew = '2.1.16' targetLangBibleVersion = '18' targetLangTWordsVersion = '19' origLangIdGreek = 'el-x-koine' origLangIdHebrew = "hbo" origLangId = origLangIdGreek if newTestament else origLangIdHebrew origLangBibleIdGreek = 'ugnt' origLangBibleIdHebrew = "uhb" origLangBibleId = origLangBibleIdGreek if newTestament else origLangBibleIdHebrew origLangVersion = origLangVersionGreek if newTestament else origLangVersionHebrew targetBibleType = f'{targetLang}_{targetBibleId}' tWordsTypeList = ['kt', 'names', 'other'] # categories of tWords processTWordsAlignments = True processAllAlignments = True origLangResourceUrl = 'https://cdn.door43.org' targetBibleLangResourceUrl = 'https://cdn.door43.org' targetTWordsLangResourceUrl = 'https://cdn.door43.org' projectsUrl = 'https://git.door43.org/lrsallee' resourceBasePath = './resources' baseDataPath = f'./data/{targetLang}/{targetBibleId}' tWordsDataFolder = f'./data/{targetLang}/{targetBibleId}/tWords' trainingDataPath = f'./data/{targetLang}/{targetBibleId}/TrainingData' dbPath = f'{baseDataPath}/alignments_{testamentStr}.sqlite' origLangPathGreek = f'{resourceBasePath}/{origLangIdGreek}/bibles/{origLangBibleIdGreek}/v{origLangVersionGreek}' origLangPathHebrew = f'{resourceBasePath}/{origLangIdHebrew}/bibles/{origLangBibleIdHebrew}/v{origLangVersionHebrew}' tWordsGreekPath = f'{resourceBasePath}/{origLangIdGreek}/translationHelps/translationWords/v{origLangVersionGreek}' targetLanguagePath = f'{resourceBasePath}/{targetLang}/bibles/{targetBibleId}/v{targetLangBibleVersion}' tWordsTargetPath = f'{resourceBasePath}/{targetLang}/translationHelps/translationWords/v{targetLangTWordsVersion}' greekLexiconPath = f'{home}/translationCore/resources/{targetLang}/lexicons/ugl/v0/content' origLangPath = origLangPathGreek if newTestament else origLangPathHebrew baseLangResourceUrl = 'https://cdn.door43.org' file.ensureFolderExists(resourceBasePath) file.ensureFolderExists(baseDataPath) file.ensureFolderExists(tWordsDataFolder) file.ensureFolderExists(trainingDataPath) cfg = { 'newTestament': newTestament, 'testamentStr': testamentStr, 'targetBibleType': targetBibleType, 'resourceBasePath': resourceBasePath, 'baseDataPath': baseDataPath, 'origLangPathGreek': origLangPathGreek, 'origLangPathHebrew': origLangPathHebrew, 'targetLanguagePath': targetLanguagePath, 'dbPath': dbPath, 'targetLang': targetLang, 'targetBibleId': targetBibleId, 'tWordsTargetPath': tWordsTargetPath, 'tWordsTypeList': tWordsTypeList, 'tWordsGreekPath': tWordsGreekPath, 'tWordsDataFolder': tWordsDataFolder, 'greekLexiconPath': greekLexiconPath, 'trainingDataPath': trainingDataPath, 'origLangResourceUrl': origLangResourceUrl, 'targetBibleLangResourceUrl': targetBibleLangResourceUrl, 'targetTWordsLangResourceUrl': targetTWordsLangResourceUrl, 'baseLangResourceUrl': baseLangResourceUrl, 'origLangId': origLangId, 'origLangBibleId': origLangBibleId, 'origLangVersionGreek': origLangVersionGreek, 'origLangVersionHebrew': origLangVersionHebrew, 'origLangVersion': origLangVersion, 'targetLangBibleVersion': targetLangBibleVersion, 'targetLangTWordsVersion': targetLangTWordsVersion, 'tWordsId': tWordsId, 'tWordsResourceName': tWordsResourceName, 'projectsUrl': projectsUrl, 'origLangPath': origLangPath, 'processAllAlignments': processAllAlignments, 'processTWordsAlignments': processTWordsAlignments } return cfg
def getConfig(): newTestament = True testamentStr = "NT" if newTestament else "OT" targetLang = "ru" targetBibleId = "rlob" tWordsId = "tw" tWordsResourceName = 'bible' origLangVersionGreek = '0.16' origLangVersionHebrew = '2.1.16' targetLangBibleVersion = '1' targetLangTWordsVersion = '3' origLangIdGreek = 'el-x-koine' origLangIdHebrew = "hbo" origLangId = origLangIdGreek if newTestament else origLangIdHebrew origLangBibleIdGreek = 'ugnt' origLangBibleIdHebrew = "uhb" origLangBibleId = origLangBibleIdGreek if newTestament else origLangBibleIdHebrew origLangVersion = origLangVersionGreek if newTestament else origLangVersionHebrew targetBibleType = f'{targetLang}_{targetBibleId}' tWordsTypeList = ['kt', 'names', 'other'] # categories of tWords tWordsUseEnUlt = True processTWordsAlignments = True processAllAlignments = True alignmentOrigWordsThreshold = 3 alignmentTargetWordsThreshold = 5 origWordsBetweenThreshold = 1 targetWordsBetweenThreshold = 1 alignmentFrequencyMinThreshold = 8 # % of the max frequency of alignments for original word origLangResourceUrl = 'https://git.door43.org/unfoldingWord/el-x-koine_ugnt/archive/master.zip' targetBibleLangResourceUrl = 'https://git.door43.org/ru_gl/ru_rlob/archive/master.zip' targetTWordsLangResourceUrl = 'https://git.door43.org/ru_gl/ru_tw/archive/master.zip' projectsUrl = 'https://git.door43.org/lrsallee' resourceBasePath = './resources' baseDataPath = f'./data/{targetLang}/{targetBibleId}' tWordsDataFolder = f'./data/{targetLang}/{targetBibleId}/tWords' trainingDataPath = f'./data/{targetLang}/{targetBibleId}/TrainingData' dbPath = f'{baseDataPath}/alignments_{testamentStr}.sqlite' origLangPathGreek = f'{resourceBasePath}/{origLangIdGreek}/bibles/{origLangBibleIdGreek}/v{origLangVersionGreek}' origLangPathHebrew = f'{resourceBasePath}/{origLangIdHebrew}/bibles/{origLangBibleIdHebrew}/v{origLangVersionHebrew}' tWordsGreekPath = f'{resourceBasePath}/{origLangIdGreek}/translationHelps/translationWords/v{origLangVersionGreek}' targetLanguagePath = f'{resourceBasePath}/{targetLang}/bibles/{targetBibleId}/v{targetLangBibleVersion}' tWordsTargetPath = f'{resourceBasePath}/{targetLang}/translationHelps/translationWords/v{targetLangTWordsVersion}' greekLexiconPath = f'{home}/translationCore/resources/{targetLang}/lexicons/ugl/v0/content' origLangPath = origLangPathGreek if newTestament else origLangPathHebrew baseLangResourceUrl = 'https://cdn.door43.org' file.ensureFolderExists(resourceBasePath) file.ensureFolderExists(baseDataPath) file.ensureFolderExists(tWordsDataFolder) file.ensureFolderExists(trainingDataPath) cfg = { 'newTestament': newTestament, 'testamentStr': testamentStr, 'targetBibleType': targetBibleType, 'resourceBasePath': resourceBasePath, 'baseDataPath': baseDataPath, 'origLangPathGreek': origLangPathGreek, 'origLangPathHebrew': origLangPathHebrew, 'targetLanguagePath': targetLanguagePath, 'dbPath': dbPath, 'targetLang': targetLang, 'targetBibleId': targetBibleId, 'tWordsTargetPath': tWordsTargetPath, 'tWordsTypeList': tWordsTypeList, 'tWordsGreekPath': tWordsGreekPath, 'tWordsDataFolder': tWordsDataFolder, 'greekLexiconPath': greekLexiconPath, 'trainingDataPath': trainingDataPath, 'origLangResourceUrl': origLangResourceUrl, 'targetBibleLangResourceUrl': targetBibleLangResourceUrl, 'targetTWordsLangResourceUrl': targetTWordsLangResourceUrl, 'baseLangResourceUrl': baseLangResourceUrl, 'origLangId': origLangId, 'origLangBibleId': origLangBibleId, 'origLangVersionGreek': origLangVersionGreek, 'origLangVersionHebrew': origLangVersionHebrew, 'origLangVersion': origLangVersion, 'targetLangBibleVersion': targetLangBibleVersion, 'targetLangTWordsVersion': targetLangTWordsVersion, 'tWordsId': tWordsId, 'tWordsResourceName': tWordsResourceName, 'projectsUrl': projectsUrl, 'origLangPath': origLangPath, 'tWordsUseEnUlt': tWordsUseEnUlt, 'processAllAlignments': processAllAlignments, 'processTWordsAlignments': processTWordsAlignments, 'alignmentOrigWordsThreshold': alignmentOrigWordsThreshold, 'alignmentTargetWordsThreshold': alignmentTargetWordsThreshold, 'origWordsBetweenThreshold': origWordsBetweenThreshold, 'targetWordsBetweenThreshold': targetWordsBetweenThreshold, 'alignmentFrequencyMinThreshold': alignmentFrequencyMinThreshold, } return cfg