Beispiel #1
0
def downloadTestamentAlignments(userUrl, bibleType, newTestament,
                                outputBasePath):
    file.ensureFolderExists(outputBasePath)
    books = getBookList(newTestament)
    for bookId in books:
        print('downloadTestamentAlignments downloading book ' + bookId)
        downloadBookAlignments(userUrl, bibleType, bookId, outputBasePath)
Beispiel #2
0
def downloadBookAlignments(userUrl, bibleType, bookId, outputBasePath):
    # https://git.door43.org/lrsallee/en_ult_act_book/raw/branch/master/en_ult_act_book.usfm
    url = file.getBookUrl(userUrl, bibleType, bookId)
    repoName = file.getRepoName(bibleType, bookId)
    outputFolder = outputBasePath + '/' + repoName
    file.ensureFolderExists(outputFolder)
    usfmDestPath = outputFolder + '/' + repoName + '.usfm'
    jsonOutput = outputBasePath + '/' + bookId
    if not os.path.isfile(usfmDestPath):
        try:
            file.downloadFile(url, usfmDestPath)
            file.ensureFolderExists(jsonOutput)
            system.convertUsfmToJson(usfmDestPath, jsonOutput)
        except:
            print(f'download of {url} failed')
    else:
        print('file already exists, skipping ' + usfmDestPath)

    file.removeEmptyFolder(outputFolder)  # don't leave empty folders behind
def getConfig():
    newTestament = True
    testamentStr = "NT" if newTestament else "OT"
    targetLang = "en"
    targetBibleId = "ult"
    tWordsId = "tw"
    tWordsResourceName = 'bible'
    origLangVersionGreek = '0.16'
    origLangVersionHebrew = '2.1.16'
    targetLangBibleVersion = '18'
    targetLangTWordsVersion = '19'
    origLangIdGreek = 'el-x-koine'
    origLangIdHebrew = "hbo"
    origLangId = origLangIdGreek if newTestament else origLangIdHebrew
    origLangBibleIdGreek = 'ugnt'
    origLangBibleIdHebrew = "uhb"
    origLangBibleId = origLangBibleIdGreek if newTestament else origLangBibleIdHebrew
    origLangVersion = origLangVersionGreek if newTestament else origLangVersionHebrew
    targetBibleType = f'{targetLang}_{targetBibleId}'
    tWordsTypeList = ['kt', 'names', 'other'] # categories of tWords
    processTWordsAlignments = True
    processAllAlignments = True

    origLangResourceUrl = 'https://cdn.door43.org'
    targetBibleLangResourceUrl = 'https://cdn.door43.org'
    targetTWordsLangResourceUrl = 'https://cdn.door43.org'
    projectsUrl = 'https://git.door43.org/lrsallee'

    resourceBasePath = './resources'
    baseDataPath = f'./data/{targetLang}/{targetBibleId}'
    tWordsDataFolder = f'./data/{targetLang}/{targetBibleId}/tWords'
    trainingDataPath = f'./data/{targetLang}/{targetBibleId}/TrainingData'
    dbPath = f'{baseDataPath}/alignments_{testamentStr}.sqlite'

    origLangPathGreek =  f'{resourceBasePath}/{origLangIdGreek}/bibles/{origLangBibleIdGreek}/v{origLangVersionGreek}'
    origLangPathHebrew = f'{resourceBasePath}/{origLangIdHebrew}/bibles/{origLangBibleIdHebrew}/v{origLangVersionHebrew}'
    tWordsGreekPath = f'{resourceBasePath}/{origLangIdGreek}/translationHelps/translationWords/v{origLangVersionGreek}'
    targetLanguagePath = f'{resourceBasePath}/{targetLang}/bibles/{targetBibleId}/v{targetLangBibleVersion}'
    tWordsTargetPath = f'{resourceBasePath}/{targetLang}/translationHelps/translationWords/v{targetLangTWordsVersion}'
    greekLexiconPath = f'{home}/translationCore/resources/{targetLang}/lexicons/ugl/v0/content'
    origLangPath = origLangPathGreek if newTestament else origLangPathHebrew

    baseLangResourceUrl = 'https://cdn.door43.org'

    file.ensureFolderExists(resourceBasePath)
    file.ensureFolderExists(baseDataPath)
    file.ensureFolderExists(tWordsDataFolder)
    file.ensureFolderExists(trainingDataPath)

    cfg = {
        'newTestament': newTestament,
        'testamentStr': testamentStr,
        'targetBibleType': targetBibleType,
        'resourceBasePath': resourceBasePath,
        'baseDataPath': baseDataPath,
        'origLangPathGreek': origLangPathGreek,
        'origLangPathHebrew': origLangPathHebrew,
        'targetLanguagePath': targetLanguagePath,
        'dbPath': dbPath,
        'targetLang': targetLang,
        'targetBibleId': targetBibleId,
        'tWordsTargetPath': tWordsTargetPath,
        'tWordsTypeList': tWordsTypeList,
        'tWordsGreekPath': tWordsGreekPath,
        'tWordsDataFolder': tWordsDataFolder,
        'greekLexiconPath': greekLexiconPath,
        'trainingDataPath': trainingDataPath,
        'origLangResourceUrl': origLangResourceUrl,
        'targetBibleLangResourceUrl': targetBibleLangResourceUrl,
        'targetTWordsLangResourceUrl': targetTWordsLangResourceUrl,
        'baseLangResourceUrl': baseLangResourceUrl,
        'origLangId': origLangId,
        'origLangBibleId': origLangBibleId,
        'origLangVersionGreek': origLangVersionGreek,
        'origLangVersionHebrew': origLangVersionHebrew,
        'origLangVersion': origLangVersion,
        'targetLangBibleVersion': targetLangBibleVersion,
        'targetLangTWordsVersion': targetLangTWordsVersion,
        'tWordsId': tWordsId,
        'tWordsResourceName': tWordsResourceName,
        'projectsUrl': projectsUrl,
        'origLangPath': origLangPath,
        'processAllAlignments': processAllAlignments,
        'processTWordsAlignments': processTWordsAlignments
    }
    return cfg
def getConfig():
    newTestament = True
    testamentStr = "NT" if newTestament else "OT"
    targetLang = "ru"
    targetBibleId = "rlob"
    tWordsId = "tw"
    tWordsResourceName = 'bible'
    origLangVersionGreek = '0.16'
    origLangVersionHebrew = '2.1.16'
    targetLangBibleVersion = '1'
    targetLangTWordsVersion = '3'
    origLangIdGreek = 'el-x-koine'
    origLangIdHebrew = "hbo"
    origLangId = origLangIdGreek if newTestament else origLangIdHebrew
    origLangBibleIdGreek = 'ugnt'
    origLangBibleIdHebrew = "uhb"
    origLangBibleId = origLangBibleIdGreek if newTestament else origLangBibleIdHebrew
    origLangVersion = origLangVersionGreek if newTestament else origLangVersionHebrew
    targetBibleType = f'{targetLang}_{targetBibleId}'
    tWordsTypeList = ['kt', 'names', 'other'] # categories of tWords
    tWordsUseEnUlt = True
    processTWordsAlignments = True
    processAllAlignments = True
    alignmentOrigWordsThreshold = 3
    alignmentTargetWordsThreshold = 5
    origWordsBetweenThreshold = 1
    targetWordsBetweenThreshold = 1
    alignmentFrequencyMinThreshold = 8 # % of the max frequency of alignments for original word

    origLangResourceUrl = 'https://git.door43.org/unfoldingWord/el-x-koine_ugnt/archive/master.zip'
    targetBibleLangResourceUrl = 'https://git.door43.org/ru_gl/ru_rlob/archive/master.zip'
    targetTWordsLangResourceUrl = 'https://git.door43.org/ru_gl/ru_tw/archive/master.zip'
    projectsUrl = 'https://git.door43.org/lrsallee'

    resourceBasePath = './resources'
    baseDataPath = f'./data/{targetLang}/{targetBibleId}'
    tWordsDataFolder = f'./data/{targetLang}/{targetBibleId}/tWords'
    trainingDataPath = f'./data/{targetLang}/{targetBibleId}/TrainingData'
    dbPath = f'{baseDataPath}/alignments_{testamentStr}.sqlite'

    origLangPathGreek =  f'{resourceBasePath}/{origLangIdGreek}/bibles/{origLangBibleIdGreek}/v{origLangVersionGreek}'
    origLangPathHebrew = f'{resourceBasePath}/{origLangIdHebrew}/bibles/{origLangBibleIdHebrew}/v{origLangVersionHebrew}'
    tWordsGreekPath = f'{resourceBasePath}/{origLangIdGreek}/translationHelps/translationWords/v{origLangVersionGreek}'
    targetLanguagePath = f'{resourceBasePath}/{targetLang}/bibles/{targetBibleId}/v{targetLangBibleVersion}'
    tWordsTargetPath = f'{resourceBasePath}/{targetLang}/translationHelps/translationWords/v{targetLangTWordsVersion}'
    greekLexiconPath = f'{home}/translationCore/resources/{targetLang}/lexicons/ugl/v0/content'
    origLangPath = origLangPathGreek if newTestament else origLangPathHebrew

    baseLangResourceUrl = 'https://cdn.door43.org'

    file.ensureFolderExists(resourceBasePath)
    file.ensureFolderExists(baseDataPath)
    file.ensureFolderExists(tWordsDataFolder)
    file.ensureFolderExists(trainingDataPath)

    cfg = {
        'newTestament': newTestament,
        'testamentStr': testamentStr,
        'targetBibleType': targetBibleType,
        'resourceBasePath': resourceBasePath,
        'baseDataPath': baseDataPath,
        'origLangPathGreek': origLangPathGreek,
        'origLangPathHebrew': origLangPathHebrew,
        'targetLanguagePath': targetLanguagePath,
        'dbPath': dbPath,
        'targetLang': targetLang,
        'targetBibleId': targetBibleId,
        'tWordsTargetPath': tWordsTargetPath,
        'tWordsTypeList': tWordsTypeList,
        'tWordsGreekPath': tWordsGreekPath,
        'tWordsDataFolder': tWordsDataFolder,
        'greekLexiconPath': greekLexiconPath,
        'trainingDataPath': trainingDataPath,
        'origLangResourceUrl': origLangResourceUrl,
        'targetBibleLangResourceUrl': targetBibleLangResourceUrl,
        'targetTWordsLangResourceUrl': targetTWordsLangResourceUrl,
        'baseLangResourceUrl': baseLangResourceUrl,
        'origLangId': origLangId,
        'origLangBibleId': origLangBibleId,
        'origLangVersionGreek': origLangVersionGreek,
        'origLangVersionHebrew': origLangVersionHebrew,
        'origLangVersion': origLangVersion,
        'targetLangBibleVersion': targetLangBibleVersion,
        'targetLangTWordsVersion': targetLangTWordsVersion,
        'tWordsId': tWordsId,
        'tWordsResourceName': tWordsResourceName,
        'projectsUrl': projectsUrl,
        'origLangPath': origLangPath,
        'tWordsUseEnUlt': tWordsUseEnUlt,
        'processAllAlignments': processAllAlignments,
        'processTWordsAlignments': processTWordsAlignments,
        'alignmentOrigWordsThreshold': alignmentOrigWordsThreshold,
        'alignmentTargetWordsThreshold': alignmentTargetWordsThreshold,
        'origWordsBetweenThreshold': origWordsBetweenThreshold,
        'targetWordsBetweenThreshold': targetWordsBetweenThreshold,
        'alignmentFrequencyMinThreshold': alignmentFrequencyMinThreshold,
    }
    return cfg