def test_obs_ts_pipeline(self, mock_s3_upload, mock_s3_get_objects, mock_download_repo, mock_post): """ :param MagicMock mock_s3_upload: :param MagicMock mock_s3_get_objects: :param MagicMock mock_download_repo: :param MagicMock mock_post: :return: """ mock_download_repo.side_effect = self.mock_download_repo mock_s3_upload.side_effect = self.mock_s3_upload_file mock_s3_get_objects.side_effect = self.mock_s3_get_objects mock_post.side_effect = self.mock_requests_post # create test event variable event = {'vars': load_json_object(os.path.join(self.resources_dir, 'en-obs-vars.json')), 'data': load_json_object(os.path.join(self.resources_dir, 'en-obs-payload.json'))} # create test context variable context = TestPipeline.JsonObject({'aws_request_id': str(uuid.uuid4())[-10:]}) # fire the web hook webhook.handle(event, context) # check that the mocks are working self.assertIn('https://git.door43.org/Door43/en-obs/commit/e323f37de1ad2c063a3659c58494edbb2641ce54', mock_download_repo.call_args[0])
def get_manifest_from_dir(self) -> Dict[str, Any]: AppSettings.logger.info(f"get_manifest_from_dir() with {self.path} …") manifest = None self.loadeded_manifest_file = False if not self.path or not os.path.isdir(self.path): return get_manifest_from_repo_name(self.repo_name) try: manifest = load_yaml_object( os.path.join(self.path, 'manifest.yaml')) except (ParserError, ScannerError) as e: err_msg = f"Badly formed 'manifest.yaml' in {self.repo_name}: {e}" AppSettings.logger.error(err_msg) self.error_messages.add(err_msg) if manifest: self.loadeded_manifest_file = True return manifest try: manifest = load_json_object( os.path.join(self.path, 'manifest.json')) except JSONDecodeError as e: err_msg = f"Badly formed 'manifest.json' in {self.repo_name}: {e}" AppSettings.logger.error(err_msg) self.error_messages.add(err_msg) if manifest: self.loadeded_manifest_file = True return manifest try: manifest = load_json_object(os.path.join(self.path, 'package.json')) except JSONDecodeError as e: err_msg = f"Badly formed 'package.json' in {self.repo_name}: {e}" AppSettings.logger.error(err_msg) self.error_messages.add(err_msg) if manifest: self.loadeded_manifest_file = True return manifest try: manifest = load_json_object(os.path.join(self.path, 'project.json')) except JSONDecodeError as e: err_msg = f"Badly formed 'project.json' in {self.repo_name}: {e}" AppSettings.logger.error(err_msg) self.error_messages.add(err_msg) if manifest: self.loadeded_manifest_file = True return manifest try: manifest = load_json_object(os.path.join(self.path, 'meta.json')) except JSONDecodeError as e: err_msg = f"Badly formed 'meta.json' in {self.repo_name}: {e}" AppSettings.logger.error(err_msg) self.error_messages.add(err_msg) if manifest: self.loadeded_manifest_file = True return manifest return get_manifest_from_repo_name(self.repo_name)
def test_obs_catalog(self): obs_v1_local = '{0}/obs/txt/1'.format(self.temp_dir) obs_v1_url = 'file://{0}/obs-catalog.json'.format(obs_v1_local) lang_url = 'file://{0}/td/langnames.json'.format(self.temp_dir) bible_stat = self.temp_dir + '/{0}/txt/1/{1}-{2}/status.json' uw_v2_local = '{0}/uw/txt/2/catalog.json'.format(self.temp_dir) ts_obs_langs_url = 'file://{0}/ts/txt/2/obs/languages.json'.format(self.temp_dir) # set up mocking CatalogUpdater.obs_v1_local = obs_v1_local CatalogUpdater.obs_v2_local = '{0}/ts/txt/2'.format(self.temp_dir) CatalogUpdater.uw_v2_local = uw_v2_local CatalogUpdater.ts_obs_langs_url = ts_obs_langs_url updater = CatalogUpdater(None, None, None) # OBS obs_v1 = get_url(obs_v1_url, True) obs_v1_catalog = json.loads(obs_v1) CatalogUpdater.obs(deepcopy(obs_v1_catalog)) # Bible lang_names = json.loads(get_url(lang_url, True)) bible_status = {} bible_bks = [] langs = set([x[2] for x in updater.bible_slugs]) for domain, slug, lang in updater.bible_slugs: file_name = bible_stat.format(domain, slug, lang) if not os.path.isfile(file_name): continue bible_status[(domain, slug, lang)] = load_json_object(file_name) bible_bks += bible_status[(domain, slug, lang)]['books_published'].keys() updater.bible(lang_names, bible_status, bible_bks, langs) # Global CatalogUpdater.ts_cat() updater.uw_cat(obs_v1_catalog, bible_status) # check door43.org/issues/376: remove tW, tN and tQ links from non-English OBS en_obs = load_json_object('{0}/ts/txt/2/obs/en/resources.json'.format(self.temp_dir))[0] self.assertNotEquals(en_obs['checking_questions'], '') self.assertNotEquals(en_obs['notes'], '') self.assertNotEquals(en_obs['terms'], '') self.assertNotEquals(en_obs['tw_cat'], '') fr_obs = load_json_object('{0}/ts/txt/2/obs/fr/resources.json'.format(self.temp_dir))[0] self.assertEquals(fr_obs['checking_questions'], '') self.assertEquals(fr_obs['notes'], '') self.assertEquals(fr_obs['terms'], '') self.assertEquals(fr_obs['tw_cat'], '')
def update_catalog(domain=None, slug=None, lang=None): global bible_stat, lang_url updater = CatalogUpdater(domain, slug, lang) # OBS obs_v1 = get_url(CatalogUpdater.obs_v1_url, True) obs_v1_catalog = json.loads(obs_v1) CatalogUpdater.obs(deepcopy(obs_v1_catalog)) # Bible lang_names = json.loads(get_url(lang_url, True)) bible_status = {} bible_bks = [] langs = set([x[2] for x in updater.bible_slugs]) for domain, slug, lang in updater.bible_slugs: file_name = bible_stat.format(domain, slug, lang) if not os.path.isfile(file_name): continue bible_status[(domain, slug, lang)] = load_json_object(file_name) bible_bks += bible_status[(domain, slug, lang)]['books_published'].keys() updater.bible(lang_names, bible_status, bible_bks, langs) # Global CatalogUpdater.ts_cat() updater.uw_cat(obs_v1_catalog, bible_status)
def ts_cat(): ts_categories = [] for x in CatalogUpdater.bible_dirs: CatalogUpdater.project_dirs.append(x) for p in CatalogUpdater.project_dirs: file_name = '{0}/{1}/languages.json'.format(CatalogUpdater.obs_v2_local, p) proj_cat = load_json_object(file_name) if not proj_cat: continue proj_url = '{0}/{1}/languages.json'.format(CatalogUpdater.obs_v2_api, p) dates = set([x['language']['date_modified'] for x in proj_cat]) dates_list = list(dates) dates_list.sort(reverse=True) sort = '01' if p in CatalogUpdater.bible_dirs: sort = [x['project']['sort'] for x in proj_cat if 'project' in x][0] meta = [] if proj_cat[0]['project']['meta']: if 'Bible: OT' in proj_cat[0]['project']['meta']: meta += ['bible-ot'] if 'Bible: NT' in proj_cat[0]['project']['meta']: meta += ['bible-nt'] ts_categories.append({'slug': p, 'date_modified': dates_list[0], 'lang_catalog': '{0}?date_modified={1}'.format( proj_url, dates_list[0]), 'sort': sort, 'meta': meta }) # Write global catalog outfile = '{0}/catalog.json'.format(CatalogUpdater.obs_v2_local) write_file(outfile, ts_categories)
def __init__(self, file_name=None): """ Class constructor. Optionally accepts the name of a file to deserialize. :param unicode file_name: The name of a file to deserialize into a BibleMetaData object """ # deserialize if file_name: if os.path.isfile(file_name): self.__dict__ = load_json_object(file_name) else: raise IOError('The file {0} was not found.'.format(file_name)) else: self.slug = '' # like "{0}-{1}".format(domain, lang) = "ulb-lpx" self.name = '' # like "Unlocked Literal Bible - Lopit" self.lang = '' # like "lpx" self.date_modified = '' # like "20160417" self.status = {"checking_entity": '', # like "Translation Team" "checking_level": '1', "comments": '', "contributors": '', "publish_date": '', # like "20160417" "source_text": 'en', "source_text_version": '2', "version": '2.1' # this is source_text_version + '.1' = 2.1 or 2.1.1 } self.books_published = {}
def __init__(self, file_name=None): """ Class constructor. Optionally accepts the name of a file to deserialize. :param str file_name: The name of a file to deserialize into a OBS object """ # deserialize if file_name: if os.path.isfile(file_name): self.__dict__ = load_json_object(file_name) else: raise IOError('The file {0} was not found.'.format(file_name)) else: self.app_words = dict( cancel='Cancel', chapters='Chapters', languages='Languages', next_chapter='Next Chapter', ok='OK', remove_locally='Remove Locally', remove_this_string= 'Remove this language from offline storage. You will need an ' 'internet connection to view it in the future.', save_locally='Save Locally', save_this_string='Save this language locally for offline use.', select_a_language='Select a Language') self.chapters = [] self.date_modified = datetime.today().strftime('%Y%m%d') self.direction = 'ltr' self.language = ''
def __init__(self, file_name=None): """ Class constructor. Optionally accepts the name of a file to deserialize. :param str file_name: The name of a file to deserialize into a BibleMetaData object """ # deserialize if file_name: if os.path.isfile(file_name): self.__dict__ = load_json_object(file_name) if 'versification' not in self.__dict__: self.versification = 'ufw' else: raise IOError('The file {0} was not found.'.format(file_name)) else: self.lang = '' self.name = '' self.slug = '' self.checking_entity = '' self.checking_level = '1' self.comments = '' self.contributors = '' self.publish_date = datetime.today().strftime('%Y-%m-%d') self.source_text = '' self.source_text_version = '' self.version = '' self.versification = 'ufw'
def mock_s3_tn_project(self, part): zip_file = os.path.join(self.resources_dir, 'converted_projects', 'en_tn_converted.zip') out_dir = os.path.join(self.temp_dir, 'en_tn_converted') unzip(zip_file, out_dir) src_dir = os.path.join(out_dir, 'en_tn_converted') self.project_files = [ f for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f)) ] self.project_key = 'u/door43/en_tn/12345678' build_log = file_utils.load_json_object( os.path.join(src_dir, 'build_log.json')) build_log['part'] = part file_utils.write_file(os.path.join(src_dir, 'build_log.json'), build_log) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'build_log.json'), '{0}/{1}/build_log.json'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'index.json'), '{0}/{1}/index.json'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'build_log.json'), '{0}/{1}/finished'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, '01-GEN.html'), '{0}/{1}/01-GEN.html'.format(self.project_key, part)) AppSettings.cdn_s3_handler().upload_file( os.path.join(src_dir, 'project.json'), 'u/door43/en_tq/project.json') AppSettings.door43_s3_handler().upload_file( os.path.join(self.resources_dir, 'templates', 'project-page.html'), 'templates/project-page.html')
def __init__(self, file_name=None, repo_name=None): """ Class constructor. Optionally accepts the name of a file to deserialize. :param str file_name: The name of a file to deserialize into a Manifest object """ # Defaults self.package_version = Manifest.PACKAGE_VERSION self.modified_at = datetime.utcnow().strftime("%Y%m%d%H%M%S") self.slug = "" self.name = "" self.icon = "https://cdn.door43.org/images/default_icon.jpg" self.formats = {} self.language = {} self.projects = {} self.status = {} # deserialize if file_name: if os.path.isfile(file_name): try: manifest_json = load_json_object(file_name) except Exception as e: raise Exception( 'Structure error of the manifest.json file: {0}'. format(e)) self.__dict__.update(manifest_json) else: raise IOError('The manifest.json file was not found') if repo_name: self.update_from_repo_name(repo_name)
def __init__(self, file_name=None): """ Class constructor. Optionally accepts the name of a file to deserialize. :param str file_name: The name of a file to deserialize into a OBS object """ # deserialize if file_name: if os.path.isfile(file_name): self.__dict__ = load_json_object(file_name) else: raise IOError('The file {0} was not found.'.format(file_name)) else: self.app_words = dict(cancel='Cancel', chapters='Chapters', languages='Languages', next_chapter='Next Chapter', ok='OK', remove_locally='Remove Locally', remove_this_string='Remove this language from offline storage. You will need an ' 'internet connection to view it in the future.', save_locally='Save Locally', save_this_string='Save this language locally for offline use.', select_a_language='Select a Language') self.chapters = [] self.date_modified = datetime.today().strftime('%Y%m%d') self.direction = 'ltr' self.language = ''
def populate_tn_groups_data(self): tn_resource_path = os.path.join(self.working_dir, 'resources', self.lang_code, 'translationHelps', 'translationNotes') if not tn_resource_path: self.logger.error(f'{tn_resource_path} not found!') exit(1) tn_version_path = get_latest_version_path(tn_resource_path) if not tn_version_path: self.logger.error(f'Version not found in {tn_resource_path}!') exit(1) groups = get_child_directories(tn_version_path) groups_data = OrderedDict() for group in groups: files_path = os.path.join(tn_version_path, f'{group}/groups/{self.project_id}', '*.json') files = glob(files_path) for file in files: base = os.path.splitext(os.path.basename(file))[0] occurrences = load_json_object(file) for occurrence in occurrences: context_id = occurrence['contextId'] chapter = str(context_id['reference']['chapter']) verse = str(context_id['reference']['verse']) tn_rc_link = f'rc://{self.lang_code}/tn/help/{group}/{base}/{self.project_id}/{self.pad(chapter)}/{verse.zfill(3)}' context_id['rc'] = tn_rc_link if chapter not in groups_data: groups_data[chapter] = OrderedDict() if verse not in groups_data[chapter]: groups_data[chapter][verse] = [] groups_data[chapter][verse].append(context_id) self.tn_groups_data = groups_data
def __init__(self, file_name=None): """ Class constructor. Optionally accepts the name of a file to deserialize. :param unicode file_name: The name of a file to deserialize into a BibleMetaData object """ # deserialize if file_name: if os.path.isfile(file_name): self.__dict__ = load_json_object(file_name) else: raise IOError('The file {0} was not found.'.format(file_name)) else: self.slug = '' # like "{0}-{1}".format(domain, lang) = "ulb-lpx" self.name = '' # like "Unlocked Literal Bible - Lopit" self.lang = '' # like "lpx" self.date_modified = '' # like "20160417" self.status = { "checking_entity": '', # like "Translation Team" "checking_level": '1', "comments": '', "contributors": '', "publish_date": '', # like "20160417" "source_text": 'en', "source_text_version": '2', "version": '2.1' # this is source_text_version + '.1' = 2.1 or 2.1.1 } self.books_published = {}
def test_ceb_psa_text_ulb_L3(self): """ Populates the ResourceContainer object and verifies the output.""" # test with the English OBS zip_file = os.path.join(self.resources_dir, 'ceb_psa_text_ulb_L3.zip') self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_') unzip(zip_file, self.out_dir) repo_dir = os.path.join(self.out_dir, 'ceb_psa_text_ulb_l3') rc = RC(directory=repo_dir) rc.as_dict() json = load_json_object(os.path.join(repo_dir, 'manifest.json')) self.assertEqual(rc.resource.identifier, json['resource']['id']) self.assertEqual(rc.resource.type, 'book') self.assertEqual(rc.resource.format, 'text/{0}'.format(json['format'])) self.assertEqual(rc.resource.file_ext, json['format']) self.assertEqual(rc.resource.conformsto, 'pre-rc') self.assertEqual(rc.resource.modified, datetime.utcnow().strftime('%Y-%m-%d')) chapters = rc.projects[0].chapters() idx = 1 for chapter in chapters: if chapter.isnumeric(): self.assertEqual(int(chapter), idx) idx += 1 self.assertEqual(len(chapters), 151) chunks = rc.projects[0].chunks('01') self.assertEqual(len(chunks), 5)
def __init__(self, *args, **kwargs) -> None: self.templater_CSS_class = 'tw' super(TwTemplater, self).__init__(*args, **kwargs) index = file_utils.load_json_object(os.path.join(self.source_dir, 'index.json')) if index: self.titles = index['titles'] self.chapters = index['chapters']
def get_usfm_data(): if not Bible.usfm_data: # TODO: change these to point to the API when it is available api_root = 'https://raw.githubusercontent.com/unfoldingWord-dev/uw-api/develop/static' usfm_data_file = api_root + '/versification/ufw/books-en.json' Bible.usfm_data = load_json_object(usfm_data_file) return Bible.usfm_data
def export_to_api(lang, status, today, cur_json): global unfoldingWord_dir, lang_cat, github_org, pages print('Getting Github credentials...', end=' ') try: github_org = None if os.path.isfile('/root/.github_pass'): # noinspection PyTypeChecker pw = open('/root/.github_pass', 'r').read().strip() g_user = githubLogin('dsm-git', pw) github_org = getGithubOrg('unfoldingword', g_user) else: print('none found...', end=' ') except GithubException as e: print_error('Problem logging into Github: {0}'.format(e)) sys.exit(1) print('finished.') print('Loading the uw catalog...', end=' ') uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json') uw_catalog = load_json_object(uw_cat_path, []) uw_cat_langs = [x['language'] for x in uw_catalog] print('finished') unfolding_word_lang_dir = os.path.join(unfoldingWord_dir, lang) if 'checking_level' in status and 'publish_date' in status: if status.checking_level in ['1', '2', '3']: front_json = OBS.get_front_matter(pages, lang, today) back_json = OBS.get_back_matter(pages, lang, today) print('Exporting {0}...'.format(lang), end=' ') export_unfolding_word(status, unfolding_word_lang_dir, cur_json, lang, github_org, front_json, back_json) if lang in uw_cat_langs: uw_catalog.pop(uw_cat_langs.index(lang)) uw_cat_langs.pop(uw_cat_langs.index(lang)) uw_catalog.append(lang_cat) uw_cat_json = json.dumps(uw_catalog, sort_keys=True, cls=OBSEncoder) write_file(uw_cat_path, uw_cat_json) # update uw_admin status page ObsPublishedLangs.update_page(ObsPublishedLangs.cat_url, ObsPublishedLangs.uw_stat_page) print('finished.') else: print_error('The `checking_level` is invalid.') sys.exit(1) else: print_error( 'The status is missing `checking_level` or `publish_date`.') sys.exit(1)
def export_to_api(lang, status, today, cur_json): global unfoldingWord_dir, lang_cat, github_org, pages print('Getting Github credentials...', end=' ') try: github_org = None if os.path.isfile('/root/.github_pass'): # noinspection PyTypeChecker pw = open('/root/.github_pass', 'r').read().strip() g_user = githubLogin('dsm-git', pw) github_org = getGithubOrg('unfoldingword', g_user) else: print('none found...', end=' ') except GithubException as e: print_error('Problem logging into Github: {0}'.format(e)) sys.exit(1) print('finished.') print('Loading the uw catalog...', end=' ') uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json') uw_catalog = load_json_object(uw_cat_path, []) uw_cat_langs = [x['language'] for x in uw_catalog] print('finished') unfolding_word_lang_dir = os.path.join(unfoldingWord_dir, lang) if 'checking_level' in status and 'publish_date' in status: if status.checking_level in ['1', '2', '3']: front_json = OBS.get_front_matter(pages, lang, today) back_json = OBS.get_back_matter(pages, lang, today) print('Exporting {0}...'.format(lang), end=' ') export_unfolding_word(status, unfolding_word_lang_dir, cur_json, lang, github_org, front_json, back_json) if lang in uw_cat_langs: uw_catalog.pop(uw_cat_langs.index(lang)) uw_cat_langs.pop(uw_cat_langs.index(lang)) uw_catalog.append(lang_cat) uw_cat_json = json.dumps(uw_catalog, sort_keys=True, cls=OBSEncoder) write_file(uw_cat_path, uw_cat_json) # update uw_admin status page ObsPublishedLangs.update_page(ObsPublishedLangs.cat_url, ObsPublishedLangs.uw_stat_page) print('finished.') else: print_error('The `checking_level` is invalid.') sys.exit(1) else: print_error('The status is missing `checking_level` or `publish_date`.') sys.exit(1)
def obs(obs_v1_cat): langs_cat = [] # Write OBS catalog for each language for e in obs_v1_cat: file_name = '{0}/{1}/obs-{1}-front-matter.json'.format(CatalogUpdater.obs_v1_local, e['language']) if not os.path.isfile(file_name): continue front_json = load_json_object(file_name) lang_entry = {'language': {'slug': e['language'], 'name': e['string'], 'direction': e['direction'], 'date_modified': e['date_modified'] }, 'project': {'name': front_json['name'], 'desc': front_json['tagline'], 'meta': [] } } lang = e['language'] del e['language'] del e['string'] del e['direction'] e['slug'] = 'obs' e['name'] = 'Open Bible Stories' e['source'] = CatalogUpdater.add_date('{0}/{1}/obs-{1}.json'.format(CatalogUpdater.obs_v1_api, lang)) if lang == 'en': e['terms'] = CatalogUpdater.add_date('{0}/{1}/kt-{1}.json'.format(CatalogUpdater.obs_v1_api, lang)) e['notes'] = CatalogUpdater.add_date('{0}/{1}/tN-{1}.json'.format(CatalogUpdater.obs_v1_api, lang)) e['tw_cat'] = CatalogUpdater.add_date('{0}/{1}/tw_cat-{1}.json'.format(CatalogUpdater.obs_v1_api, lang)) e['checking_questions'] = CatalogUpdater.add_date('{0}/{1}/CQ-{1}.json'.format( CatalogUpdater.obs_v1_api, lang)) else: e['terms'] = '' e['notes'] = '' e['tw_cat'] = '' e['checking_questions'] = '' e['date_modified'] = CatalogUpdater.most_recent(e) outfile = '{0}/obs/{1}/resources.json'.format(CatalogUpdater.obs_v2_local, lang) write_file(outfile, [e]) lang_entry['res_catalog'] = '{0}/obs/{1}/resources.json?date_modified={2}'.format(CatalogUpdater.obs_v2_api, lang, e['date_modified']) langs_cat.append(lang_entry) # Write global OBS catalog outfile = '{0}/obs/languages.json'.format(CatalogUpdater.obs_v2_local) write_file(outfile, langs_cat)
def __init__(self, content_dir=None): """ Class constructor. Takes a path to a directory :param object content_dir: Path to the directory of OBS manifest file """ self.content_dir = content_dir self.manifest_file = os.path.join(self.content_dir, 'manifest.json') if os.path.isfile(self.manifest_file): self.__dict__ = load_json_object(self.manifest_file) else: raise IOError('The file {0} was not found.'.format( self.manifest_file))
def populate_verse_usfm(self, bible_id, lang_code=None): if not lang_code: lang_code = self.lang_code bible_path = os.path.join(self.working_dir, 'resources', lang_code, 'bibles', bible_id) if not bible_path: self.logger.error(f'{bible_path} not found!') exit(1) bible_version_path = get_latest_version_path(bible_path) if not bible_version_path: self.logger.error(f'No versions found in {bible_path}!') exit(1) book_data = OrderedDict() book_file = os.path.join( self.resources[bible_id].repo_dir, f'{self.book_number}-{self.project_id.upper()}.usfm') book_usfm = read_file(book_file) unaligned_usfm = unalign_usfm(book_usfm) chapters = unaligned_usfm.split(r'\c ') for chapter_usfm in chapters[1:]: chapter = re.findall(r'(\d+)', chapter_usfm)[0] book_data[chapter] = OrderedDict() chapter_usfm = r'\c ' + chapter_usfm chapter_vo_file = os.path.join(bible_version_path, self.project_id, f'{chapter}.json') chapter_verse_objects = load_json_object(chapter_vo_file) verses = chapter_usfm.split(r'\v ') for verse_usfm in verses[1:]: from_verse, to_verse = re.findall(r'^(\d+)(?:-(\d+))*', verse_usfm)[0] if not to_verse: to_verse = from_verse for verse in range(int(from_verse), int(to_verse) + 1): verse = str(verse) from_to_verse = f'{from_verse}-{to_verse}' if from_to_verse in chapter_verse_objects: usfm = rf'\v {from_to_verse} {self.get_text_from_verse_objects(chapter_verse_objects[from_to_verse])}' elif verse in chapter_verse_objects: usfm = rf'\v {verse} {self.get_text_from_verse_objects(chapter_verse_objects[verse]["verseObjects"])}' else: usfm = rf'\v {verse_usfm}' html = self.get_verse_html(usfm, bible_id, chapter, verse) book_data[chapter][verse] = { 'usfm': usfm.strip(), 'html': html.strip() } self.verse_usfm[bible_id] = book_data
def tw_cat(self): if not self._tw_cat: mapping = { 'idol': 'falsegod', 'witness': 'testimony', 'newcovenant': 'covenant', 'taxcollector': 'tax', 'believer': 'believe' } tw_cat_file = os.path.join(self.converters_dir, 'tw_cat.json') self._tw_cat = load_json_object(tw_cat_file) for chapter in self._tw_cat['chapters']: self._tw_cat[chapter['id']] = {} for frame in chapter['frames']: self._tw_cat[chapter['id']][frame['id']] = [] for item in frame['items']: term = item['id'] category = None for c in ['kt', 'names', 'other']: if os.path.exists( os.path.join(self.resources['tw'].repo_dir, 'bible', c, f'{term}.md')): category = c break if not category and term in mapping: category = None for c in ['kt', 'names', 'other']: if os.path.exists( os.path.join( self.resources['tw'].repo_dir, 'bible', c, f'{mapping[term]}.md')): category = c term = mapping[term] break if category: self._tw_cat[chapter['id']][frame['id']].append( f'rc://{self.lang_code}/tw/dict/bible/{category}/{term}' ) if not category or term != item['id']: fix = None if term != item['id']: fix = f'change to: {term}' source_rc_link = f'rc://{self.lang_code}/tw_cat/{chapter["id"]}/{frame["id"]}' source_rc = self.create_rc(source_rc_link) self.add_error_message(source_rc, item['id'], fix) return self._tw_cat
def get_verse_objects(self, bible_id, chapter, verse): bible_path = os.path.join(self.resources_dir, self.lang_code, 'bibles', bible_id) if not bible_path: self.logger.error(f'{bible_path} not found!') exit(1) bible_version_path = get_latest_version_path(bible_path) if not bible_version_path: self.logger.error(f'No versions found in {bible_path}!') exit(1) chapter_json_path = f'{bible_version_path}/{self.project_id}/{chapter}.json' data = load_json_object(chapter_json_path) if verse in data: return data[verse]['verseObjects'] else: return []
def __init__(self, file_name=None, meta=None, repo_name=None, files_path=None): """ Class constructor. Optionally accepts the name of a file to deserialize. :param str file_name: The name of a file to deserialize into a Manifest object """ # Defaults self.package_version = Manifest.LATEST_VERSION self.format = "" self.generator = {"name": "", "build": ""} self.target_language = {"id": "", "name": "", "direction": "ltr"} self.project = {"id": "", "name": ""} self.type = {"id": "text", "name": "Text"} self.resource = {"id": "", "name": ""} self.source_translations = [] self.parent_draft = {} self.translators = [] self.finished_chunks = [] # deserialize if file_name: if os.path.isfile(file_name): manifest_json = load_json_object(file_name) manifest_json = Manifest.standardize_manifest_json( manifest_json) self.__dict__.update(manifest_json) else: raise IOError('The file {0} was not found.'.format(file_name)) if meta: self.update_from_meta(meta) if files_path: self.update_from_files(files_path) if repo_name: self.update_from_repo_name(repo_name) if not self.resource['id'] and ( self.format == 'usfm' or (self.project['id'] and self.project['id'].lower() in BOOK_NAMES)): self.resource['id'] = 'bible' self.resource['name'] = 'Bible'
def populate_tw_words_data(self): tw_path = os.path.join(self.working_dir, 'resources', self.ol_lang_code, 'translationHelps/translationWords') if not tw_path: self.logger.error(f'{tw_path} not found!') exit(1) tw_version_path = get_latest_version_path(tw_path) if not tw_version_path: self.logger.error(f'No versions found in {tw_path}!') exit(1) groups = get_child_directories(tw_version_path) words_data = OrderedDict() for group in groups: files_path = os.path.join(tw_version_path, f'{group}/groups/{self.project_id}', '*.json') files = glob(files_path) for file in files: base = os.path.splitext(os.path.basename(file))[0] tw_rc_link = f'rc://{self.lang_code}/tw/dict/bible/{group}/{base}' tw_group_data = load_json_object(file) for group_data in tw_group_data: chapter = str( group_data['contextId']['reference']['chapter']) verse = str(group_data['contextId']['reference']['verse']) group_data['contextId']['rc'] = tw_rc_link group_data['alignments'] = { self.ult_id: self.get_aligned_text(self.ult_id, group_data['contextId']), self.ust_id: self.get_aligned_text(self.ust_id, group_data['contextId']) } if chapter not in words_data: words_data[chapter] = OrderedDict() if verse not in words_data[chapter]: words_data[chapter][verse] = [] words_data[chapter][verse].append(group_data) self.tw_words_data = words_data
def __init__(self, file_name=None): """ Class constructor. Optionally accepts the name of a file to deserialize. :param str file_name: The name of a file to deserialize into a TAStatus object """ # deserialize if file_name: if os.path.isfile(file_name): self.__dict__ = load_json_object(file_name) else: raise IOError('The file {0} was not found.'.format(file_name)) else: self.checking_entity = '' self.checking_level = '1' self.comments = '' self.contributors = '' self.license = 'CC BY-SA 4.0' self.publish_date = datetime.today().strftime('%Y-%m-%d') self.source_text = 'en' self.source_text_version = '' self.version = ''
def test_bible_from_tx_pre_rc(self): """ Populates the ResourceContainer object and verifies the output.""" # test with the English OBS zip_file = os.path.join(self.resources_dir, 'id_mat_text_ulb-ts.zip') self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_') unzip(zip_file, self.out_dir) repo_dir = os.path.join(self.out_dir, 'id_mat_text_ulb-ts') rc = RC(directory=repo_dir) rc.as_dict() json = load_json_object(os.path.join(repo_dir, 'manifest.json')) self.assertEqual(rc.resource.identifier, json['resource']['id']) self.assertEqual(rc.resource.type, 'book') self.assertEqual(rc.resource.format, 'text/{0}'.format(json['format'])) self.assertEqual(rc.resource.file_ext, json['format']) self.assertEqual(rc.resource.conformsto, 'pre-rc') self.assertEqual(rc.resource.modified, datetime.utcnow().strftime('%Y-%m-%d')) chapters = rc.projects[0].chapters() self.assertEqual(len(chapters), 29) chunks = rc.projects[0].chunks('01') self.assertEqual(len(chunks), 11)
def test_en_obs_package_json(self): """ Populates the ResourceContainer object and verifies the output.""" # test with the English OBS zip_file = os.path.join(self.resources_dir, 'en-obs-package-json.zip') self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_') unzip(zip_file, self.out_dir) repo_dir = os.path.join(self.out_dir, 'en-obs') rc = RC(directory=repo_dir) rc.as_dict() package_json = load_json_object(os.path.join(repo_dir, 'package.json')) self.assertEqual(rc.resource.identifier, package_json['resource']['slug']) self.assertEqual(rc.resource.type, 'book') self.assertEqual(rc.resource.format, package_json['content_mime_type']) self.assertEqual(rc.resource.file_ext, 'md') self.assertEqual(rc.resource.conformsto, 'pre-rc') self.assertEqual(rc.resource.issued, package_json['resource']['status']['pub_date']) chapters = rc.projects[0].chapters() self.assertEqual(len(chapters), 2) chunks = rc.project().chunks('_back') self.assertEqual(chunks, ['back-matter.md'])
import os from general_tools.file_utils import load_json_object # Mappings gathered from here: # https://r12a.github.io/scripts (primary site, copied in languages from the "languages using" section of each script) # https://www.google.com/get/noto/ # http://td.unfoldingword.org/uw/languages/ # https://www.monotype.com/resources/case-studies/more-than-800-languages-in-a-single-typeface-creating-noto-for-google SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) noto_font_list_file = os.path.join(SCRIPT_DIR, 'noto_font_list.json') font_fallbacks_file = os.path.join(SCRIPT_DIR, 'font_fallbacks.json') font_by_lang_file = os.path.join(SCRIPT_DIR, 'fonts_by_lang.json') DEFAULT_FALLBACK = ['Noto Sans', 'sans-serif'] FONT_FALLBACKS = load_json_object(font_fallbacks_file) NOTO_FONT_LIST = load_json_object(noto_font_list_file) FONTS_BY_LANG = load_json_object(font_by_lang_file) # Some font-families need "Noto Sans" in front of it so Latin letters & numbers will show in Noto, such as CJK PRECEDING_FONT_FAMILIES = { 'Noto Sans JC': ['Noto Sans'], 'Noto Sans SC': ['Noto Sans'], 'Noto Sans TC': ['Noto Sans'], }
def tests(): # TIT 1 8 xy12 figs-doublet δίκαιον, ὅσιον 1 righteous, holy group_data = load_json_object( '/Users/richmahn/working/resources/en/translationHelps/translationNotes/v23/figures/groups/tit/figs-doublet.json' ) chapter_verse_objects = load_json_object( '/Users/richmahn/working/resources/en/bibles/ult/v8/tit/1.json') quote = group_data[1]["contextId"]["quote"] verse_objects = chapter_verse_objects["8"]["verseObjects"] alignments = get_alignment(verse_objects, quote) print(alignments) return # TIT 1 2 r2gj πρὸ χρόνων αἰωνίων 1 before all the ages of time chapter_verse_objects = load_json_object( '/Users/richmahn/working/resources/en/bibles/ult/v8/tit/1.json') quote = 'πρὸ χρόνων αἰωνίων' occurrence = 1 verse_objects = chapter_verse_objects["2"]["verseObjects"] alignments = get_alignment(verse_objects, quote, occurrence) print(alignments) return string = 'בִּימֵי֙ שְׁפֹ֣ט הַשֹּׁפְטִ֔ים' group_data = load_json_object( '/Users/richmahn/working/resources/en/translationHelps/translationNotes/v23/other/groups/rut/grammar-connect-time-simultaneous.json' ) chapter_verse_objects = load_json_object( '/Users/richmahn/working/resources/en/bibles/ult/v8/rut/1.json') quote = group_data[0]["contextId"]["quote"] verse_objects = chapter_verse_objects["1"]["verseObjects"] alignments = get_alignment(verse_objects, quote) print(alignments) # RUT 4 22 abcd figs-explicit אֶת־דָּוִֽד 1 David group_data = load_json_object( '/Users/richmahn/working/resources/en/translationHelps/translationNotes/v23/culture/groups/rut/figs-explicit.json' ) chapter_verse_objects = load_json_object( '/Users/richmahn/working/resources/en/bibles/ult/v8/rut/4.json') quote = group_data[12]["contextId"]["quote"] occurrence = group_data[12]["contextId"]["occurrence"] verse_objects = chapter_verse_objects["22"]["verseObjects"] alignments = get_alignment(verse_objects, quote, occurrence) print(alignments) # RUT 4 17 f9ha figs-explicit אֲבִ֥י דָוִֽד 1 the father of David quote = group_data[11]["contextId"]["quote"] occurrence = group_data[11]["contextId"]["occurrence"] verse_objects = chapter_verse_objects["17"]["verseObjects"] alignments = get_alignment(verse_objects, quote, occurrence) print(alignments) # RUT 4 19 rl3k translate-names וְחֶצְרוֹן֙…עַמִּֽינָדָֽב׃ 1 Hezron…Amminadab group_data = load_json_object( '/Users/richmahn/working/resources/en/translationHelps/translationNotes/v23/culture/groups/rut/translate-names.json' ) quote = group_data[-1]["contextId"]["quote"] occurrence = group_data[-1]["contextId"]["occurrence"] verse_objects = chapter_verse_objects["17"]["verseObjects"] alignments = get_alignment(verse_objects, quote, occurrence) print(alignments) # RUT 1 4 aee6 שֵׁ֤ם הָֽאַחַת֙…וְשֵׁ֥ם הַשֵּׁנִ֖י 1 the name of the first woman was…and the name of the second woman was quote = 'שֵׁ֤ם הָֽאַחַת֙…וְשֵׁ֥ם הַשֵּׁנִ֖י' occurrence = 1 chapter_verse_objects = load_json_object( '/Users/richmahn/working/resources/en/bibles/ult/v8/rut/1.json') verse_objects = chapter_verse_objects["4"]["verseObjects"] alignments = get_alignment(verse_objects, quote, occurrence) print(alignments)
def get_tw_checking_html(self): tw_html = f''' <section id="{self.lang_code}-{self.name}-{self.project_id}" class="{self.name}"> <article id="{self.lang_code}-{self.name}-{self.project_id}-cover" class="resource-title-page"> <img src="{self.main_resource.logo_url}" class="logo" alt="UTW"> <h1 class="section-header">{self.title}</h1> <h2 class="section-header">{self.project_title}</h2> </article> ''' tw_path = os.path.join(self.resources_dir, self.ol_lang_code, 'translationHelps/translationWords') if not tw_path: self.logger.error(f'{tw_path} not found!') exit(1) tw_version_path = get_latest_version_path(tw_path) if not tw_version_path: self.logger.error(f'No versions found in {tw_path}!') exit(1) groups = get_child_directories(tw_version_path) for group in groups: files_path = os.path.join(tw_version_path, f'{group}/groups/{self.project_id}', '*.json') files = glob(files_path) for file in files: base = os.path.splitext(os.path.basename(file))[0] tw_rc_link = f'rc://{self.lang_code}/tw/dict/bible/{group}/{base}' tw_rc = self.add_rc(tw_rc_link, title=base) self.get_tw_article_html(tw_rc) tw_html += f''' <article id="{tw_rc.article_id}"> <h3 class="section-header">[[{tw_rc.rc_link}]]</h3> <table width="100%"> <tr> <th style="width:1px;padding:0 !important"></th> <th>Verse</th> <th>{self.ult_id.upper()} Alignment</th> <th>{self.ult_id.upper()} Text</th> <th>{self.ust_id.upper()} Alignment</th> <th>{self.ust_id.upper()} Text</th> <th>{self.ol_bible_id.upper()} Quote</th> <th>{self.ol_bible_id.upper()} Text</th> </tr> ''' tw_group_data = load_json_object(file) for group_data in tw_group_data: context_id = group_data['contextId'] context_id['rc'] = tw_rc.rc_link chapter = str(context_id['reference']['chapter']) verse = str(context_id['reference']['verse']) context_id['scripture'] = {} context_id['alignments'] = {} for bible_id in [self.ult_id, self.ust_id]: alignment = self.get_aligned_text( bible_id, group_data['contextId']) if alignment: context_id['alignments'][ bible_id] = flatten_alignment(alignment) else: context_id['alignments'][ bible_id] = '<div style="color: red">NONE</div>' scripture = self.get_plain_scripture( bible_id, chapter, verse) marked_html = None if alignment: marked_html = mark_phrases_in_html( scripture, alignment) if marked_html: context_id['scripture'][bible_id] = marked_html else: context_id['scripture'][ bible_id] = f'<div style="color: red">{scripture}</div>' scripture = self.get_plain_scripture( self.ol_bible_id, chapter, verse) ol_alignment = context_id['quote'] if isinstance(ol_alignment, str): ol_alignment = split_string_into_alignment( ol_alignment) if not isinstance(ol_alignment[0], list): ol_alignment = convert_single_dimensional_quote_to_multidimensional( ol_alignment) marked_html = mark_phrases_in_html(scripture, ol_alignment) if marked_html: context_id['scripture'][self.ol_bible_id] = marked_html else: context_id['scripture'][ self. ol_bible_id] = f'<div style="color: red">{scripture}</div>' tw_html += f''' <tr id="{tw_rc.article_id}-{chapter}-{verse}"> <td style="width:1px;padding:0 !important"><a href="#{tw_rc.article_id}-{chapter}-{verse}"><i class="fa fa-link"></i></td> <td> {chapter}:{verse} </td> <td> {context_id['alignments'][self.ult_id]} </td> <td> {context_id['scripture'][self.ult_id]} </td> <td> {context_id['alignments'][self.ust_id]} </td> <td> {context_id['scripture'][self.ust_id]} </td> <td style="direction: {'rtl' if self.ol_lang_code == 'hbo' else 'ltr'}"> {flatten_alignment(ol_alignment)} </td> <td style="direction: {'rtl' if self.ol_lang_code == 'hbo' else 'ltr'}"> {context_id['scripture'][self.ol_bible_id]} </td> </tr> ''' tw_html += ''' </table> </article> ''' tw_html += ''' </section> ''' self.logger.info('Done generating TW Checking HTML.') return tw_html
def run(self): relative_path_re = re.compile(r'([{ ])obs/tex/', re.UNICODE) sys.stdout = codecs.getwriter('utf8')(sys.stdout) top_tmp_f = self.get_json(self.lang, 'obs-{0}-front-matter.json', '{0}-front-matter-json.tmp') bot_tmp_f = self.get_json(self.lang, 'obs-{0}-back-matter.json', '{0}-back-matter-json.tmp') lang_top_json = load_json_object(top_tmp_f, {}) lang_bot_json = load_json_object(bot_tmp_f, {}) # Parse the front and back matter front_matter = self.export_matter(lang_top_json['front-matter'], 0) # The front matter really has two parts, an "about" section and a "license" section # Sadly the API returns it as one blob, but we want to insert the checking level # indicator on between the two. Until such a time as the API returns these strings separately, # this is a hack to split them. Failing a match it should just put the whole thing in the first section # fm = re.split(r'\{\\\\bf.+:\s*\}\\n', front_matter) fm = re.split(r'\s(?=\{\\bf.+:\s*\})', front_matter) output_front_about = fm[0] if len(fm) > 1: output_front_license = ''.join(fm[1:]) else: output_front_license = '' output_back = self.export_matter(lang_bot_json['back-matter'], 0) # Parse the body matter jsonf = 'obs-{0}.json'.format(self.lang) tmpf = self.get_json(self.lang, jsonf, '{0}-body-matter-json.tmp') self.body_json = load_json_object(tmpf, {}) self.check_for_standard_keys_json() # Hacks to make up for missing localized strings if 'toctitle' not in self.body_json.keys(): self.body_json['toctitle'] = OBSTexExport.extract_title_from_frontmatter(lang_top_json['front-matter']) output = self.export(self.body_json['chapters'], self.max_chapters, self.img_res, self.body_json['language']) # For ConTeXt files only, Read the "main_template.tex" file replacing # all <<<[anyvar]>>> with its definition from the body-matter JSON file outlist = [] tex_template = os.path.join(OBSTexExport.snippets_dir, 'main_template.tex') if not os.path.exists(tex_template): print("Failed to get TeX template.") sys.exit(1) with codecs.open(tex_template, 'r', encoding='utf-8-sig') as in_file: template = in_file.read() # replace relative path to fonts with absolute template = relative_path_re.sub(r'\1{0}/'.format(OBSTexExport.snippets_dir), template) for single_line in template.splitlines(): if OBSTexExport.matchChaptersPat.search(single_line): outlist.append(output) elif OBSTexExport.matchFrontMatterAboutPat.search(single_line): outlist.append(output_front_about) elif OBSTexExport.matchFrontMatterlicensePat.search(single_line): outlist.append(output_front_license) elif OBSTexExport.matchBackMatterPat.search(single_line): outlist.append(output_back) else: occurs = 1 while occurs > 0: (single_line, occurs) \ = OBSTexExport.matchMiscPat.subn(self.another_replace, single_line, OBSTexExport.MATCH_ALL) outlist.append(single_line) full_output = '\n'.join(outlist) write_file(self.out_path, full_output)
def test_load_json_object(self): d = {"one": 1, "two": 2, "child": {"three": 3}} _, self.tmp_file = tempfile.mkstemp(prefix='Door43_test_') with open(self.tmp_file, "w") as tmpf: json.dump(d, tmpf) self.assertEqual(file_utils.load_json_object(self.tmp_file), d)
'The tools directory was not found. The PDF cannot be generated.' ) resp = prompt( 'Do you want to continue without generating a PDF? [Y|n]: ') if resp != '' and resp != 'Y' and resp != 'y': sys.exit(0) today = ''.join(str(datetime.date.today()).rsplit('-')[0:3]) print('Loading languages...', end=' ') lang_dict = OBS.load_lang_strings() print('finished.') print('Loading the catalog...', end=' ') uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json') uw_catalog = load_json_object(uw_cat_path, []) uw_cat_langs = [x['language'] for x in uw_catalog] cat_path = os.path.join(export_dir, 'obs-catalog.json') catalog = load_json_object(cat_path, []) print('finished') if 'obs' not in os.listdir(os.path.join(pages, lang)): print('OBS not configured in Door43 for {0}'.format(lang)) sys.exit(1) print('Getting metadata...', end=' ') app_words = get_json_dict(os.path.join(pages, lang, 'obs/app_words.txt')) lang_direction = 'ltr' if lang in rtl: lang_direction = 'rtl' obs_obj = OBS()
def run(self): try: self.temp_dir = tempfile.mkdtemp(prefix='txOBS_') # clean up the git repo url if self.source_repo_url[-4:] == '.git': self.source_repo_url = self.source_repo_url[:-4] if self.source_repo_url[-1:] == '/': self.source_repo_url = self.source_repo_url[:-1] # download the archive file_to_download = join_url_parts(self.source_repo_url, 'archive/master.zip') repo_dir = self.source_repo_url.rpartition('/')[2] downloaded_file = os.path.join(self.temp_dir, repo_dir + '.zip') try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') # unzip the archive try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, self.temp_dir) finally: print('finished.') # get the manifest try: print('Reading the manifest...', end=' ') manifest = load_json_object(os.path.join(self.temp_dir, 'manifest.json')) finally: print('finished.') # create output directory make_dir(self.output_directory) # read the markdown files and output html files try: print('Processing the OBS markdown files') files_to_process = [] for i in range(1, 51): files_to_process.append(str(i).zfill(2) + '.md') current_dir = os.path.dirname(inspect.stack()[0][1]) with codecs.open(os.path.join(current_dir, 'template.html'), 'r', 'utf-8-sig') as html_file: html_template = html_file.read() for file_to_process in files_to_process: # read the markdown file file_name = os.path.join(self.temp_dir, repo_dir, 'content', file_to_process) with codecs.open(file_name, 'r', 'utf-8-sig') as md_file: md = md_file.read() html = markdown.markdown(md) html = TransformOBS.dir_re.sub(r'\1\n' + html + r'\n\2', html_template) write_file(os.path.join(self.output_directory, file_to_process.replace('.md', '.html')), html) except IOError as ioe: print_error('{0}: {1}'.format(ioe.strerror, ioe.filename)) self.errors.append(ioe) except Exception as e: print_error(e.message) self.errors.append(e) finally: print('finished.') except Exception as e: print_error(e.message) self.errors.append(e)
def main(git_repo, tag, no_pdf): global download_dir # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables today = ''.join(str(datetime.date.today()).rsplit('-')[0:3]) # str(datetime.date.today()) download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip') manifest = None status = None # type: OBSStatus content_dir = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'manifest.json' in files: # read the manifest try: print('Reading the manifest...', end=' ') content_dir = root manifest = load_json_object(os.path.join(root, 'manifest.json')) finally: print('finished.') if 'status.json' in files: # read the meta data try: print('Reading the status...', end=' ') content_dir = root status = OBSStatus(os.path.join(root, 'status.json')) finally: print('finished.') # if we have everything, exit the loop if content_dir and manifest and status: break # check for valid repository structure if not manifest: print_error('Did not find manifest.json in {}'.format(git_repo)) sys.exit(1) if not status: print_error('Did not find status.json in {}'.format(git_repo)) sys.exit(1) print('Initializing OBS object...', end=' ') lang = manifest['target_language']['id'] obs_obj = OBS() obs_obj.date_modified = today obs_obj.direction = manifest['target_language']['direction'] obs_obj.language = lang print('finished') obs_obj.chapters = load_obs_chapters(content_dir) obs_obj.chapters.sort(key=lambda c: c['number']) if not obs_obj.verify_all(): print_error('Quality check did not pass.') sys.exit(1) print('Loading languages...', end=' ') lang_dict = OBS.load_lang_strings() print('finished.') print('Loading the catalog...', end=' ') export_dir = '/var/www/vhosts/door43.org/httpdocs/exports' # uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json') # uw_catalog = load_json_object(uw_cat_path, []) # uw_cat_langs = [x['language'] for x in uw_catalog] cat_path = os.path.join(export_dir, 'obs-catalog.json') catalog = load_json_object(cat_path, []) print('finished') print('Getting already published languages...', end=' ') json_lang_file_path = os.path.join(export_dir, lang, 'obs', 'obs-{0}.json'.format(lang)) # prev_json_lang = load_json_object(json_lang_file_path, {}) if lang not in lang_dict: print("Configuration for language {0} missing.".format(lang)) sys.exit(1) print('finished.') updated = update_language_catalog(lang, obs_obj.direction, status, today, lang_dict, catalog) print('Writing the OBS file to the exports directory...', end=' ') cur_json = json.dumps(obs_obj, sort_keys=True, cls=OBSEncoder) if updated: ([x for x in catalog if x['language'] == lang][0]['date_modified']) = today write_file(json_lang_file_path.replace('.txt', '.json'), cur_json) print('finished.') export_to_api(lang, status, today, cur_json) cat_json = json.dumps(catalog, sort_keys=True, cls=OBSEncoder) write_file(cat_path, cat_json) # update the catalog print_ok('STARTING: ', 'updating the catalogs.') update_catalog() print_ok('FINISHED: ', 'updating the catalogs.') if no_pdf: return create_pdf(lang, status.checking_level, status.version)
def main(git_repo, tag, no_pdf): global download_dir # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables today = ''.join(str(datetime.date.today()).rsplit( str('-'))[0:3]) # str(datetime.date.today()) download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/{0}.zip'.format(tag)) manifest = None status = None # type: OBSStatus content_dir = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'manifest.json' in files: # read the manifest try: print('Reading the manifest...', end=' ') content_dir = root manifest = load_json_object(os.path.join( root, 'manifest.json')) status = OBSStatus.from_manifest(manifest) finally: print('finished.') if 'content' in dirs: content_dir = os.path.join(root, 'content') # if we have everything, exit the loop if content_dir and manifest and status: break # check for valid repository structure if not manifest: print_error('Did not find manifest.json in {}'.format(git_repo)) sys.exit(1) print('Initializing OBS object...', end=' ') lang = manifest['language']['slug'] obs_obj = OBS() obs_obj.date_modified = today obs_obj.direction = manifest['language']['dir'] obs_obj.language = lang print('finished') obs_obj.chapters = load_obs_chapters(content_dir) obs_obj.chapters.sort(key=lambda c: int(c['number'])) if not obs_obj.verify_all(): print_error('Quality check did not pass.') sys.exit(1) print('Loading languages...', end=' ') lang_dict = OBS.load_lang_strings() print('finished.') print('Loading the catalog...', end=' ') export_dir = '/var/www/vhosts/door43.org/httpdocs/exports' cat_path = os.path.join(export_dir, 'obs-catalog.json') catalog = load_json_object(cat_path, []) print('finished') print('Getting already published languages...', end=' ') json_lang_file_path = os.path.join(export_dir, lang, 'obs', 'obs-{0}.json'.format(lang)) if lang not in lang_dict: print("Configuration for language {0} missing.".format(lang)) sys.exit(1) print('finished.') updated = update_language_catalog(lang, obs_obj.direction, status, today, lang_dict, catalog) print('Writing the OBS file to the exports directory...', end=' ') cur_json = json.dumps(obs_obj, sort_keys=True, cls=OBSEncoder) if updated: ([x for x in catalog if x['language'] == lang][0]['date_modified']) = today # noinspection PyTypeChecker write_file(json_lang_file_path.replace('.txt', '.json'), cur_json) print('finished.') export_to_api(lang, status, today, cur_json) cat_json = json.dumps(catalog, sort_keys=True, cls=OBSEncoder) write_file(cat_path, cat_json) # update the catalog print_ok('STARTING: ', 'updating the catalogs.') update_catalog() print_ok('FINISHED: ', 'updating the catalogs.') if no_pdf: return create_pdf(lang, status.checking_level, status.version)
def run(self): if 'git.door43.org' not in self.source_repo_url: print_warning( 'Currently only git.door43.org repositories are supported.') sys.exit(0) try: # clean up the git repo url if self.source_repo_url[-4:] == '.git': self.source_repo_url = self.source_repo_url[:-4] if self.source_repo_url[-1:] == '/': self.source_repo_url = self.source_repo_url[:-1] # download the archive file_to_download = join_url_parts(self.source_repo_url, 'archive/master.zip') repo_dir = self.source_repo_url.rpartition('/')[2] downloaded_file = os.path.join(self.temp_dir, repo_dir + '.zip') try: if not self.quiet: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: if not self.quiet: print('finished.') # unzip the archive try: if not self.quiet: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, self.temp_dir) finally: if not self.quiet: print('finished.') # get the manifest try: if not self.quiet: print('Reading the manifest...', end=' ') manifest = load_json_object( os.path.join(self.temp_dir, 'manifest.json')) finally: if not self.quiet: print('finished.') # create output directory make_dir(self.output_directory) # read the markdown files and output html files try: if not self.quiet: print('Processing the OBS markdown files') files_to_process = [] for i in range(1, 51): files_to_process.append(str(i).zfill(2) + '.md') current_dir = os.path.dirname(inspect.stack()[0][1]) with codecs.open(os.path.join(current_dir, 'template.html'), 'r', 'utf-8-sig') as html_file: html_template = html_file.read() for file_to_process in files_to_process: # read the markdown file file_name = os.path.join(self.temp_dir, repo_dir, 'content', file_to_process) with codecs.open(file_name, 'r', 'utf-8-sig') as md_file: md = md_file.read() html = markdown.markdown(md) html = TransformOBS.dir_re.sub(r'\1\n' + html + r'\n\2', html_template) write_file( os.path.join(self.output_directory, file_to_process.replace('.md', '.html')), html) except IOError as ioe: print_error('{0}: {1}'.format(ioe.strerror, ioe.filename)) self.errors.append(ioe) except Exception as e: print_error(e.message) self.errors.append(e) finally: if not self.quiet: print('finished.') except Exception as e: print_error(e.message) self.errors.append(e)
if not os.path.isdir(tools_dir): tools_dir = None print_notice('The tools directory was not found. The PDF cannot be generated.') resp = prompt('Do you want to continue without generating a PDF? [Y|n]: ') if resp != '' and resp != 'Y' and resp != 'y': sys.exit(0) today = ''.join(str(datetime.date.today()).rsplit('-')[0:3]) print('Loading languages...', end=' ') lang_dict = OBS.load_lang_strings() print('finished.') print('Loading the catalog...', end=' ') uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json') uw_catalog = load_json_object(uw_cat_path, []) uw_cat_langs = [x['language'] for x in uw_catalog] cat_path = os.path.join(export_dir, 'obs-catalog.json') catalog = load_json_object(cat_path, []) print('finished') if 'obs' not in os.listdir(os.path.join(pages, lang)): print('OBS not configured in Door43 for {0}'.format(lang)) sys.exit(1) print('Getting metadata...', end=' ') app_words = get_json_dict(os.path.join(pages, lang, 'obs/app_words.txt')) lang_direction = 'ltr' if lang in rtl: lang_direction = 'rtl' obs_obj = OBS()
def main(git_repo, tag, domain): global download_dir, out_template # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables today = ''.join(str(datetime.date.today()).rsplit('-')[0:3]) # str(datetime.date.today()) download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip') manifest = None metadata_obj = None content_dir = '' usfm_file = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'manifest.json' in files: # read the manifest try: print('Reading the manifest...', end=' ') manifest = load_json_object(os.path.join(root, 'manifest.json')) content_dir = root # look for the usfm file for the whole book found_usfm = glob(os.path.join(content_dir, '*.usfm')) if len(found_usfm) == 1: usfm_file = os.path.join(content_dir, found_usfm[0]) finally: print('finished.') if 'meta.json' in files: # read the metadata try: print('Reading the metadata...', end=' ') metadata_obj = BibleMetaData(os.path.join(root, 'meta.json')) finally: print('finished.') # if we have everything, exit the loop if manifest and metadata_obj: break # check for valid repository structure if not manifest: print_error('Did not find manifest.json in {}'.format(git_repo)) sys.exit(1) if not metadata_obj: print_error('Did not find meta.json in {}'.format(git_repo)) sys.exit(1) # get the versification data print('Getting versification info...', end=' ') vrs = Bible.get_versification(metadata_obj.versification) # type: list<Book> # get the book object for this repository book = next((b for b in vrs if b.book_id.lower() == manifest['project']['id']), None) # type: Book if not book: print_error('Book versification data was not found for "{}"'.format(manifest['project']['id'])) sys.exit(1) print('finished') if usfm_file: read_unified_file(book, usfm_file) else: read_chunked_files(book, content_dir, metadata_obj) # do basic checks print('Running USFM checks...', end=' ') book.verify_chapters_and_verses(True) if book.validation_errors: print_error('These USFM errors must be corrected before publishing can continue.') sys.exit(1) else: print('finished.') # insert paragraph markers print('Inserting paragraph markers...', end=' ') Bible.insert_paragraph_markers(book) print('finished.') # get chunks for this book print('Chunking the text...', end=' ') Bible.chunk_book(metadata_obj.versification, book) book.apply_chunks() print('finished.') # save the output out_dir = out_template.format(domain, metadata_obj.slug) # produces something like '01-GEN.usfm' book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id) print('Writing ' + book_file_name + '...', end=' ') write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm) print('finished.') # look for an existing status.json file print('Updating the status for {0}...'.format(metadata_obj.lang), end=' ') status_file = '{0}/status.json'.format(out_dir) if os.path.isfile(status_file): status = BibleStatus(status_file) else: status = BibleStatus() status.update_from_meta_data(metadata_obj) # add this book to the list of "books_published" status.add_book_published(book) # update the "date_modified" status.date_modified = today print('finished.') # save the status.json file print('Writing status.json...', end=' ') status_json = json.dumps(status, sort_keys=True, indent=2, cls=BibleEncoder) write_file(status_file, status_json) print('finished') # let the API know it is there print('Publishing to the API...') with api_publish(out_dir) as api: api.run() print('Finished publishing to the API.') # update the catalog print() print('Updating the catalogs...', end=' ') update_catalog() print('finished.') print_notice('Check {0} and do a git push'.format(out_dir))
def load_static_json_file(file_name): file_name = os.path.join(app_utils.get_static_dir(), file_name) return load_json_object(file_name, {})