def check_yaml_values(self, yaml_data): return_val = True # check the required yaml values if not TAArticle.check_value_is_valid_int('volume', yaml_data): print_error('Volume value is not valid.') return_val = False if not self.check_value_is_valid_string('manual', yaml_data): print_error('Manual value is not valid.') return_val = False if not self.check_value_is_valid_string('slug', yaml_data): print_error('Volume value is not valid.') return_val = False else: # slug cannot contain a dash, only underscores test_slug = str(yaml_data['slug']).strip() if '-' in test_slug: print_error('Slug values cannot contain hyphen (dash).') return_val = False if not self.check_value_is_valid_string('title', yaml_data): print_error('Title value is not valid.') return_val = False return return_val
def check_missing_pages(toc_obj, content_dir): toc_slugs = toc_obj.all_slugs() page_slugs = get_all_page_slugs(content_dir) not_in_pages = list(set(toc_slugs) - set(page_slugs)) not_in_toc = list(set(page_slugs) - set(toc_slugs)) if not_in_toc: print_warning( 'The following pages are in the content directory but not in the TOC:' ) for item in not_in_toc: print('- ' + item) print() print( 'If you continue these pages will NOT be included in the published product.' ) print() resp = prompt( 'Do you want to continue with the data as it is? [Y|n]: ') if resp != '' and resp[0:1].lower() != 'y': sys.exit(0) if not_in_pages: print_error( 'The following pages are in the TOC but were not found in the content directory:' ) for item in not_in_pages: print('- ' + item) print() sys.exit(1)
def get_q_and_a(text): cq = [] first_line = None for line in text.splitlines(): line = line.strip() if not first_line and line.startswith('#'): first_line = line continue if line.startswith('\n') or \ line == '' or \ line.startswith('~~') or \ line.startswith('#') or \ line.startswith('{{') or \ line.startswith('__[') or \ line.startswith('These questions will'): continue if q_re.search(line): item = {'q': q_re.search(line).group(1).strip()} elif a_re.search(line): item['a'] = a_re.search(line).group(1).strip() item['ref'] = fix_refs(ref_re.findall(item['a'])) item['a'] = item['a'].split(str('['))[0].strip() cq.append(item) continue else: print_error('tQ error in {0}: {1}'.format(first_line, line)) return cq
def load_pages(self, content_dir): toc_slugs = self.toc.all_slugs() for slug in toc_slugs: print('Processing {0}...'.format(slug), end=' ') file_name = os.path.join(content_dir, slug + '.md') with codecs.open(file_name, 'r', 'utf-8-sig') as in_file: content = in_file.read() article = TAArticle(content, slug) if not article.yaml: print_error('No yaml data found for ' + slug) self.articles.append(article) print('finished.')
def main(directory_to_check, versification): """ :param str|unicode directory_to_check: :param str|unicode versification: """ # get the versification data vrs = Bible.get_versification(versification) # type: list<Book> # walk through the usfm files patterns = ['*.usfm', '*.sfm', '*.SFM'] usfm_files = [] for pattern in patterns: usfm_files.extend(glob(os.path.join(directory_to_check, pattern))) errors_found = False for usfm_file in usfm_files: # read the file with codecs.open(usfm_file, 'r', 'utf-8') as in_file: book_text = in_file.read() # get the book id book_search = id_re.search(book_text) if not book_search: print_error('Book id not found in {}'.format(usfm_file)) sys.exit(1) book_id = book_search.group(1) print('Beginning {}...'.format(book_id), end=' ') # get book versification info book = next((b for b in vrs if b.book_id == book_id), None) if not book: print_error( 'Book versification data was not found for "{}"'.format( book_id)) sys.exit(1) # remove \s5 lines book_text = s5_re.sub('', book_text) # get the usfm for the book book.set_usfm(book_text) # do basic checks book.verify_chapters_and_verses(True) if book.validation_errors: errors_found = True print('finished.') # stop if errors were found if errors_found: print_error( 'These USFM errors must be corrected before publishing can continue.' ) sys.exit(1)
def check_value_is_valid_int(value_to_check, yaml_data): if value_to_check not in yaml_data: print_error('"' + value_to_check + '" data value for page is missing') return False if not yaml_data[value_to_check]: print_error('"' + value_to_check + '" data value for page is blank') return False data_value = yaml_data[value_to_check] if not isinstance(data_value, int): try: data_value = int(data_value) except: try: data_value = int(float(data_value)) except: return False return isinstance(data_value, int)
def check_missing_pages(toc_obj, content_dir): toc_slugs = toc_obj.all_slugs() page_slugs = get_all_page_slugs(content_dir) not_in_pages = list(set(toc_slugs) - set(page_slugs)) not_in_toc = list(set(page_slugs) - set(toc_slugs)) if not_in_toc: print_warning('The following pages are in the content directory but not in the TOC:') for item in not_in_toc: print('- ' + item) print() print('If you continue these pages will NOT be included in the published product.') print() resp = prompt('Do you want to continue with the data as it is? [Y|n]: ') if resp != '' and resp[0:1].lower() != 'y': sys.exit(0) if not_in_pages: print_error('The following pages are in the TOC but were not found in the content directory:') for item in not_in_pages: print('- ' + item) print() sys.exit(1)
def main(directory_to_check, versification): """ :param str|unicode directory_to_check: :param str|unicode versification: """ # get the versification data vrs = Bible.get_versification(versification) # type: list<Book> # walk through the usfm files patterns = ['*.usfm', '*.sfm', '*.SFM'] usfm_files = [] for pattern in patterns: usfm_files.extend(glob(os.path.join(directory_to_check, pattern))) errors_found = False for usfm_file in usfm_files: # read the file with codecs.open(usfm_file, 'r', 'utf-8') as in_file: book_text = in_file.read() # get the book id book_search = id_re.search(book_text) if not book_search: print_error('Book id not found in {}'.format(usfm_file)) sys.exit(1) book_id = book_search.group(1) print('Beginning {}...'.format(book_id), end=' ') # get book versification info book = next((b for b in vrs if b.book_id == book_id), None) if not book: print_error('Book versification data was not found for "{}"'.format(book_id)) sys.exit(1) # remove \s5 lines book_text = s5_re.sub('', book_text) # get the usfm for the book book.set_usfm(book_text) # do basic checks book.verify_chapters_and_verses(True) if book.validation_errors: errors_found = True print('finished.') # stop if errors were found if errors_found: print_error('These USFM errors must be corrected before publishing can continue.') sys.exit(1)
def export_to_api(lang, status, today, cur_json): global unfoldingWord_dir, lang_cat, github_org, pages print('Getting Github credentials...', end=' ') try: github_org = None if os.path.isfile('/root/.github_pass'): # noinspection PyTypeChecker pw = open('/root/.github_pass', 'r').read().strip() g_user = githubLogin('dsm-git', pw) github_org = getGithubOrg('unfoldingword', g_user) else: print('none found...', end=' ') except GithubException as e: print_error('Problem logging into Github: {0}'.format(e)) sys.exit(1) print('finished.') print('Loading the uw catalog...', end=' ') uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json') uw_catalog = load_json_object(uw_cat_path, []) uw_cat_langs = [x['language'] for x in uw_catalog] print('finished') unfolding_word_lang_dir = os.path.join(unfoldingWord_dir, lang) if 'checking_level' in status and 'publish_date' in status: if status.checking_level in ['1', '2', '3']: front_json = OBS.get_front_matter(pages, lang, today) back_json = OBS.get_back_matter(pages, lang, today) print('Exporting {0}...'.format(lang), end=' ') export_unfolding_word(status, unfolding_word_lang_dir, cur_json, lang, github_org, front_json, back_json) if lang in uw_cat_langs: uw_catalog.pop(uw_cat_langs.index(lang)) uw_cat_langs.pop(uw_cat_langs.index(lang)) uw_catalog.append(lang_cat) uw_cat_json = json.dumps(uw_catalog, sort_keys=True, cls=OBSEncoder) write_file(uw_cat_path, uw_cat_json) # update uw_admin status page ObsPublishedLangs.update_page(ObsPublishedLangs.cat_url, ObsPublishedLangs.uw_stat_page) print('finished.') else: print_error('The `checking_level` is invalid.') sys.exit(1) else: print_error( 'The status is missing `checking_level` or `publish_date`.') sys.exit(1)
def read_chunked_files(book, content_dir, metadata_obj): print('Reading chapter USFM files...', end=' ') for i in range(0, len(book.chapters) + 1): # get the directory for this chapter chapter_dir = os.path.join(content_dir, str(i).zfill(2)) if not os.path.isdir(chapter_dir): print_error('Did not find directory for chapter {}.'.format(i)) sys.exit(1) # directory 00 contains the translated book title if i == 0: file_name = os.path.join(chapter_dir, 'title.txt') if not os.path.isfile(file_name): print_error('Did not find file "{}".'.format(file_name)) sys.exit(1) with codecs.open(file_name, 'r', 'utf-8-sig') as in_file: translated_name = in_file.read() header_usfm = Bible.get_header_text() header_usfm = header_usfm.replace('{BOOK_CODE}', book.book_id) header_usfm = header_usfm.replace('{BIBLE_NAME}', metadata_obj.name) header_usfm = header_usfm.replace('{BOOK_NAME_SHORT}', translated_name) header_usfm = header_usfm.replace('{BOOK_NAME_LONG}', translated_name) book.name = translated_name book.header_usfm = header_usfm else: # other directories will have the chunk files for the chapter chapter = next((c for c in book.chapters if c.number == i), None) # type: Chapter chunk_list = [f for f in os.listdir(chapter_dir) if re.search(r'[0-1]?[0-9][0-9]\.txt$', f)] chunk_list.sort() for chunk_file in chunk_list: # skip the junk chunk in the last chapter if chunk_file == '00.txt' or chunk_file == '000.txt': continue file_name = os.path.join(chapter_dir, chunk_file) if not os.path.isfile(file_name): print_error('Did not find file "{}".'.format(file_name)) sys.exit(1) with codecs.open(file_name, 'r', 'utf-8-sig') as in_file: chunk_usfm = in_file.read() chapter.usfm += reformat_usfm(remove_chapter_markers(chunk_usfm)) + "\n" book.build_usfm_from_chapters() print('finished.')
def export_to_api(lang, status, today, cur_json): global unfoldingWord_dir, lang_cat, github_org, pages print('Getting Github credentials...', end=' ') try: github_org = None if os.path.isfile('/root/.github_pass'): # noinspection PyTypeChecker pw = open('/root/.github_pass', 'r').read().strip() g_user = githubLogin('dsm-git', pw) github_org = getGithubOrg('unfoldingword', g_user) else: print('none found...', end=' ') except GithubException as e: print_error('Problem logging into Github: {0}'.format(e)) sys.exit(1) print('finished.') print('Loading the uw catalog...', end=' ') uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json') uw_catalog = load_json_object(uw_cat_path, []) uw_cat_langs = [x['language'] for x in uw_catalog] print('finished') unfolding_word_lang_dir = os.path.join(unfoldingWord_dir, lang) if 'checking_level' in status and 'publish_date' in status: if status.checking_level in ['1', '2', '3']: front_json = OBS.get_front_matter(pages, lang, today) back_json = OBS.get_back_matter(pages, lang, today) print('Exporting {0}...'.format(lang), end=' ') export_unfolding_word(status, unfolding_word_lang_dir, cur_json, lang, github_org, front_json, back_json) if lang in uw_cat_langs: uw_catalog.pop(uw_cat_langs.index(lang)) uw_cat_langs.pop(uw_cat_langs.index(lang)) uw_catalog.append(lang_cat) uw_cat_json = json.dumps(uw_catalog, sort_keys=True, cls=OBSEncoder) write_file(uw_cat_path, uw_cat_json) # update uw_admin status page ObsPublishedLangs.update_page(ObsPublishedLangs.cat_url, ObsPublishedLangs.uw_stat_page) print('finished.') else: print_error('The `checking_level` is invalid.') sys.exit(1) else: print_error('The status is missing `checking_level` or `publish_date`.') sys.exit(1)
def check_value_is_valid_string(value_to_check, yaml_data): if value_to_check not in yaml_data: print_error('"' + value_to_check + '" data value for page is missing') return False if not yaml_data[value_to_check]: print_error('"' + value_to_check + '" data value for page is blank') return False data_value = yaml_data[value_to_check] if not isinstance(data_value, str) and not isinstance(data_value, unicode): print_error('"' + value_to_check + '" data value for page is not a string') return False if not data_value.strip(): print_error('"' + value_to_check + '" data value for page is blank') return False return True
def get_yaml_data(self, raw_yaml_text): return_val = {} # convert windows line endings cleaned = raw_yaml_text.replace('\r\n', '\n') # replace curly quotes cleaned = cleaned.replace('“', '"').replace('”', '"') # split into individual values, removing empty lines parts = filter(bool, cleaned.split('\n')) # check each value for part in parts: # split into name and value pieces = part.split(':', 1) # must be 2 pieces if len(pieces) != 2: print_error('Bad yaml format => ' + part) return None # try to parse # noinspection PyBroadException try: parsed = yaml.load(part) except: print_error('Not able to parse yaml value => ' + part) return None if not isinstance(parsed, dict): print_error('Yaml parse did not return the expected type => ' + part) return None # add the successfully parsed value to the dictionary for key in parsed.keys(): return_val[key] = parsed[key] if not self.check_yaml_values(return_val): return None return return_val
def run(self): if 'git.door43.org' not in self.source_repo_url: print_warning( 'Currently only git.door43.org repositories are supported.') sys.exit(0) try: # clean up the git repo url if self.source_repo_url[-4:] == '.git': self.source_repo_url = self.source_repo_url[:-4] if self.source_repo_url[-1:] == '/': self.source_repo_url = self.source_repo_url[:-1] # download the archive file_to_download = join_url_parts(self.source_repo_url, 'archive/master.zip') repo_dir = self.source_repo_url.rpartition('/')[2] downloaded_file = os.path.join(self.temp_dir, repo_dir + '.zip') try: if not self.quiet: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: if not self.quiet: print('finished.') # unzip the archive try: if not self.quiet: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, self.temp_dir) finally: if not self.quiet: print('finished.') # get the manifest try: if not self.quiet: print('Reading the manifest...', end=' ') manifest = load_json_object( os.path.join(self.temp_dir, 'manifest.json')) finally: if not self.quiet: print('finished.') # create output directory make_dir(self.output_directory) # read the markdown files and output html files try: if not self.quiet: print('Processing the OBS markdown files') files_to_process = [] for i in range(1, 51): files_to_process.append(str(i).zfill(2) + '.md') current_dir = os.path.dirname(inspect.stack()[0][1]) with codecs.open(os.path.join(current_dir, 'template.html'), 'r', 'utf-8-sig') as html_file: html_template = html_file.read() for file_to_process in files_to_process: # read the markdown file file_name = os.path.join(self.temp_dir, repo_dir, 'content', file_to_process) with codecs.open(file_name, 'r', 'utf-8-sig') as md_file: md = md_file.read() html = markdown.markdown(md) html = TransformOBS.dir_re.sub(r'\1\n' + html + r'\n\2', html_template) write_file( os.path.join(self.output_directory, file_to_process.replace('.md', '.html')), html) except IOError as ioe: print_error('{0}: {1}'.format(ioe.strerror, ioe.filename)) self.errors.append(ioe) except Exception as e: print_error(e.message) self.errors.append(e) finally: if not self.quiet: print('finished.') except Exception as e: print_error(e.message) self.errors.append(e)
def main(git_repo, tag, domain): global download_dir, out_template # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables today = ''.join(str(datetime.date.today()).rsplit('-')[0:3]) download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip') books_published = {} metadata_obj = None usfm_dir = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'meta.json' in files: # read the metadata try: print('Reading the metadata...', end=' ') metadata_obj = BibleMetaData(os.path.join(root, 'meta.json')) finally: print('finished.') if 'usfm' in dirs: usfm_dir = os.path.join(root, 'usfm') # if we have everything, exit the loop if usfm_dir and metadata_obj: break # check for valid repository structure if not metadata_obj: print_error('Did not find meta.json in {}'.format(git_repo)) sys.exit(1) if not usfm_dir: print_error('Did not find the usfm directory in {}'.format(git_repo)) sys.exit(1) # get the versification data vrs = Bible.get_versification(metadata_obj.versification) # type: list<Book> out_dir = out_template.format(domain, metadata_obj.slug, metadata_obj.lang) # walk through the usfm files usfm_files = glob(os.path.join(usfm_dir, '*.usfm')) errors_found = False for usfm_file in usfm_files: # read the file with codecs.open(usfm_file, 'r', 'utf-8') as in_file: book_text = in_file.read() # get the book id book_search = id_re.search(book_text) if not book_search: print_error('Book id not found in {}'.format(usfm_file)) sys.exit(1) book_id = book_search.group(1) print('Beginning {}...'.format(book_id), end=' ') # get book versification info book = next((b for b in vrs if b.book_id == book_id), None) if not book: print_error('Book versification data was not found for "{}"'.format(book_id)) sys.exit(1) # remove \s5 lines book_text = s5_re.sub('', book_text) # get the usfm for the book book.set_usfm(book_text) # do basic checks book.verify_usfm_tags() book.verify_chapters_and_verses(True) if book.validation_errors: errors_found = True # get chunks for this book Bible.chunk_book(metadata_obj.versification, book) book.apply_chunks() # produces something like '01-GEN.usfm' book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id) print('Writing ' + book_file_name + '...', end=' ') write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm) meta = ['Bible: OT'] if book.number > 39: meta = ['Bible: NT'] books_published[book.book_id.lower()] = {'name': book.name, 'meta': meta, 'sort': str(book.number).zfill(2), 'desc': '' } print('finished.') # stop if errors were found if errors_found: print_error('These USFM errors must be corrected before publishing can continue.') sys.exit(1) print('Writing status.json...', end=' ') status = {"slug": '{0}'.format(metadata_obj.slug.lower()), "name": metadata_obj.name, "lang": metadata_obj.lang, "date_modified": today, "books_published": books_published, "status": {"checking_entity": metadata_obj.checking_entity, "checking_level": metadata_obj.checking_level, "comments": metadata_obj.comments, "contributors": metadata_obj.contributors, "publish_date": today, "source_text": metadata_obj.source_text, "source_text_version": metadata_obj.source_text_version, "version": metadata_obj.version } } write_file('{0}/status.json'.format(out_dir), status, indent=2) print('finished.') print() print('Publishing to the API...') with api_publish(out_dir) as api: api.run() print('Finished publishing to the API.') # update the catalog print() print('Updating the catalogs...', end=' ') update_catalog() print('finished.') print_notice('Check {0} and do a git push'.format(out_dir))
def append_error(self, message, prefix='** '): print_error(prefix + message) self.validation_errors.append(message)
parser.add_argument('-u', '--url', dest='url', default=False, required=False, help='Door43 page where the source can be found.') parser.add_argument('-p', '--nopdf', dest='nopdf', action='store_true', help='Do not produce a PDF.') args = parser.parse_args(sys.argv[1:]) if not args.gitrepo and not args.url: print_error( 'You must provide either --gitrepo or --url to this script.') sys.exit(0) try: # get the language data try: print('Downloading language data...', end=' ') langs = get_languages() finally: print('finished.') this_lang = next(l for l in langs if l['lc'] == args.lang) if not this_lang: print_error('Information for language "{0}" was not found.'.format( args.lang))
def run(self): try: self.temp_dir = tempfile.mkdtemp(prefix='txOBS_') # clean up the git repo url if self.source_repo_url[-4:] == '.git': self.source_repo_url = self.source_repo_url[:-4] if self.source_repo_url[-1:] == '/': self.source_repo_url = self.source_repo_url[:-1] # download the archive file_to_download = join_url_parts(self.source_repo_url, 'archive/master.zip') repo_dir = self.source_repo_url.rpartition('/')[2] downloaded_file = os.path.join(self.temp_dir, repo_dir + '.zip') try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') # unzip the archive try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, self.temp_dir) finally: print('finished.') # get the manifest try: print('Reading the manifest...', end=' ') manifest = load_json_object(os.path.join(self.temp_dir, 'manifest.json')) finally: print('finished.') # create output directory make_dir(self.output_directory) # read the markdown files and output html files try: print('Processing the OBS markdown files') files_to_process = [] for i in range(1, 51): files_to_process.append(str(i).zfill(2) + '.md') current_dir = os.path.dirname(inspect.stack()[0][1]) with codecs.open(os.path.join(current_dir, 'template.html'), 'r', 'utf-8-sig') as html_file: html_template = html_file.read() for file_to_process in files_to_process: # read the markdown file file_name = os.path.join(self.temp_dir, repo_dir, 'content', file_to_process) with codecs.open(file_name, 'r', 'utf-8-sig') as md_file: md = md_file.read() html = markdown.markdown(md) html = TransformOBS.dir_re.sub(r'\1\n' + html + r'\n\2', html_template) write_file(os.path.join(self.output_directory, file_to_process.replace('.md', '.html')), html) except IOError as ioe: print_error('{0}: {1}'.format(ioe.strerror, ioe.filename)) self.errors.append(ioe) except Exception as e: print_error(e.message) self.errors.append(e) finally: print('finished.') except Exception as e: print_error(e.message) self.errors.append(e)
def import_obs(lang_data, git_repo, door43_url, no_pdf): global download_dir, root, api_dir lang_code = lang_data['lc'] # pre-flight checklist link_source = '/var/www/vhosts/api.unfoldingword.org/httpdocs/obs/jpg/1/en' if not os.path.isdir(link_source): print_error('Image source directory not found: {0}.'.format(link_source)) sys.exit(1) if git_repo[-1:] != '/': git_repo += '/' if no_pdf: tools_dir = None else: tools_dir = '/var/www/vhosts/door43.org/tools' if not os.path.isdir(tools_dir): tools_dir = os.path.expanduser('~/Projects/tools') # prompt if tools not found if not os.path.isdir(tools_dir): tools_dir = None print_notice('The tools directory was not found. The PDF cannot be generated.') resp = prompt('Do you want to continue without generating a PDF? [Y|n]: ') if resp != '' and resp != 'Y' and resp != 'y': sys.exit(0) if git_repo: if git_repo[-1:] == '/': git_repo = git_repo[:-1] download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) # make sure OBS is initialized on Dokuwiki test_dir = root.format(lang_code, '') if not os.path.isdir(test_dir): print_warning('It seems OBS has not been initialized on Door43.org for {0}'.format(lang_code)) sys.exit(1) elif door43_url: print_error('URL not yet implemented.') return else: print_error('Source not provided.') return # get the source files from the git repository if 'github' in git_repo: # https://github.com/unfoldingWord/obs-ru # https://raw.githubusercontent.com/unfoldingWord/obs-ru/master/obs-ru.json raw_url = git_repo.replace('github.com', 'raw.githubusercontent.com') elif 'git.door43.org' in git_repo: raw_url = join_url_parts(git_repo, 'raw') else: # this is to keep IntelliJ happy, is should have been caught in sub main return # download needed files from the repository file_suffix = '-{0}.json'.format(lang_code.lower()) files_to_download = [ join_url_parts(raw_url, 'master/obs' + file_suffix), join_url_parts(raw_url, 'master/status' + file_suffix) ] for file_to_download in files_to_download: downloaded_file = os.path.join(download_dir, file_to_download.rpartition('/')[2]) try: print('Downloading {0}...'.format(file_to_download), end=' ') download_file(file_to_download, downloaded_file) finally: print('finished.') # read the files from the git repository file_suffix = '-{0}.json'.format(lang_code.lower()) obs_obj = None status_obj = None # front_matter_found = False # back_matter_found = False try: print('Examining the files...', end=' ') for root_path, dirs, files in os.walk(download_dir): if not len(files): continue for git_file in files: if git_file == 'obs' + file_suffix: obs_obj = OBS(os.path.join(root_path, git_file)) elif git_file == 'status' + file_suffix: status_obj = OBSStatus(os.path.join(root_path, git_file)) # elif 'front-matter' in git_file: # front_matter_found = True # elif 'back-matter' in git_file: # back_matter_found = True finally: print('finished.') # check data integrity if not obs_obj.verify_all(): sys.exit(1) if not status_obj: print_error('The file "status{0}" was not found in the git repository.'.format(file_suffix)) sys.exit(1) # create Dokuwiki pages print_ok('Begin: ', 'creating Dokuwiki pages.') for chapter in obs_obj.chapters: chapter_title = '====== {0} ======'.format(chapter['title']) chapter_ref = '//{0}//'.format(chapter['ref']) chapter_body = '' chapter_num = chapter['number'].zfill(2) for frame in chapter['frames']: chapter_body += '{{{{{0}?direct&}}}}\n\n{1}\n\n'.format(frame['img'], frame['text']) file_name = root.format(lang_code, chapter_num + '.txt') print(' Writing {0}'.format(file_name)) with codecs.open(file_name, 'w', 'utf-8-sig') as out_file: out_file.write('{0}\n\n{1}{2}\n'.format(chapter_title, chapter_body, chapter_ref)) print_ok('Finished: ', 'creating Dokuwiki pages.') # Create image symlinks on api.unfoldingword.org try: print('Creating symlink to images directory...', end=' ') link_name = '/var/www/vhosts/api.unfoldingword.org/httpdocs/obs/jpg/1/{0}'.format(lang_code.lower()) if not os.path.isfile(link_name) and not os.path.isdir(link_name) and not os.path.islink(link_name): os.symlink(link_source, link_name) finally: print('finished.') # Create PDF via ConTeXt if not no_pdf and tools_dir and os.path.isdir(tools_dir): try: print_ok('Beginning: ', 'PDF generation.') script_file = os.path.join(tools_dir, 'obs/book/pdf_export.sh') out_dir = api_dir.format(lang_code) make_dir(out_dir) process = subprocess.Popen([script_file, '-l', lang_code, '-c', status_obj.checking_level, '-v', status_obj.version, '-o', out_dir], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # wait for the process to terminate out, err = process.communicate() exit_code = process.returncode out = out.strip().decode('utf-8') err = err.strip().decode('utf-8') # the error message may be in stdout if exit_code != 0: if not err: err = out out = None if err: print_error(err, 2) if out: print(' ' + out) print(' PDF subprocess finished with exit code {0}'.format(exit_code)) finally: print_ok('Finished:', 'generating PDF.')
dest='nopdf', action='store_true', help='Do not produce a PDF.') args = parser.parse_args(sys.argv[1:]) lang = args.lang uw_export = args.uwexport test_export = args.testexport no_pdf = args.nopdf print_ok('STARTING: ', 'importing OBS from Dokuwiki') # pre-flight checklist link_source = '/var/www/vhosts/api.unfoldingword.org/httpdocs/obs/jpg/1/en' if not os.path.isdir(link_source): print_error( 'Image source directory not found: {0}.'.format(link_source)) sys.exit(1) if no_pdf: tools_dir = None else: tools_dir = '/var/www/vhosts/door43.org/tools' if not os.path.isdir(tools_dir): tools_dir = os.path.expanduser('~/Projects/tools') # prompt if tools not found if not os.path.isdir(tools_dir): tools_dir = None print_notice( 'The tools directory was not found. The PDF cannot be generated.' )
def rechunk_this_one(api_directory): global id_re, s5_re print_notice('Processing {}'.format(api_directory)) # read the status.json file with codecs.open(os.path.join(api_directory, 'status.json'), 'r', 'utf-8-sig') as in_file: status = json.loads(in_file.read()) # determine versification if status['lang'] == 'ru': versification = 'rsc' elif status['lang'] == 'hi' or status['lang'] == 'sr-Latn' or status[ 'lang'] == 'hu' or status['lang'] == 'ta': versification = 'ufw-odx' elif status['lang'] == 'bn': versification = 'ufw-bn' elif status['lang'] == 'ar': versification = 'avd' elif status['lang'] == 'kn': versification = 'ufw-rev' else: versification = 'ufw' versification_data = Bible.get_versification( versification) # type: list<Book> # remove all .sig files for f in os.listdir(api_directory): if f.endswith('.sig'): os.remove(os.path.join(api_directory, f)) # rechunk files in this directory usfm_files = glob(os.path.join(api_directory, '*.usfm')) errors_found = False for usfm_file in usfm_files: if usfm_file.endswith('LICENSE.usfm'): continue # read the file with codecs.open(usfm_file, 'r', 'utf-8') as in_file: book_text = in_file.read() # get the book id book_search = id_re.search(book_text) if not book_search: print_error('Book id not found in {}'.format(usfm_file)) sys.exit(1) book_id = book_search.group(1) print('Beginning {}...'.format(book_id), end=' ') # get book versification info book = next((b for b in versification_data if b.book_id == book_id), None) if not book: print_error( 'Book versification data was not found for "{}"'.format( book_id)) sys.exit(1) # remove \s5 lines book_text = s5_re.sub('', book_text) # get the usfm for the book book.set_usfm(book_text) # do basic checks book.verify_chapters_and_verses(True) if book.validation_errors: errors_found = True # get chunks for this book Bible.chunk_book(versification, book) book.apply_chunks() # produces something like '01-GEN.usfm' book_file_name = '{0}-{1}.usfm'.format( str(book.number).zfill(2), book.book_id) print('Writing ' + book_file_name + '...', end=' ') write_file(usfm_file, book.usfm) print('finished.') if errors_found: print_error( 'These USFM errors must be corrected before publishing can continue.' ) sys.exit(1) # rebuild source for tS print() print('Publishing to the API...') with api_publish(api_directory) as api: api.run() print('Finished publishing to the API.') # update the catalog print() print('Updating the catalogs...', end=' ') update_catalog() print('finished.')
def main(git_repo, tag, domain): global download_dir, out_template # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables today = ''.join(str(datetime.date.today()).rsplit('-')[0:3]) # str(datetime.date.today()) download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip') manifest = None metadata_obj = None content_dir = '' usfm_file = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'manifest.json' in files: # read the manifest try: print('Reading the manifest...', end=' ') manifest = load_json_object(os.path.join(root, 'manifest.json')) content_dir = root # look for the usfm file for the whole book found_usfm = glob(os.path.join(content_dir, '*.usfm')) if len(found_usfm) == 1: usfm_file = os.path.join(content_dir, found_usfm[0]) finally: print('finished.') if 'meta.json' in files: # read the metadata try: print('Reading the metadata...', end=' ') metadata_obj = BibleMetaData(os.path.join(root, 'meta.json')) finally: print('finished.') # if we have everything, exit the loop if manifest and metadata_obj: break # check for valid repository structure if not manifest: print_error('Did not find manifest.json in {}'.format(git_repo)) sys.exit(1) if not metadata_obj: print_error('Did not find meta.json in {}'.format(git_repo)) sys.exit(1) # get the versification data print('Getting versification info...', end=' ') vrs = Bible.get_versification(metadata_obj.versification) # type: list<Book> # get the book object for this repository book = next((b for b in vrs if b.book_id.lower() == manifest['project']['id']), None) # type: Book if not book: print_error('Book versification data was not found for "{}"'.format(manifest['project']['id'])) sys.exit(1) print('finished') if usfm_file: read_unified_file(book, usfm_file) else: read_chunked_files(book, content_dir, metadata_obj) # do basic checks print('Running USFM checks...', end=' ') book.verify_chapters_and_verses(True) if book.validation_errors: print_error('These USFM errors must be corrected before publishing can continue.') sys.exit(1) else: print('finished.') # insert paragraph markers print('Inserting paragraph markers...', end=' ') Bible.insert_paragraph_markers(book) print('finished.') # get chunks for this book print('Chunking the text...', end=' ') Bible.chunk_book(metadata_obj.versification, book) book.apply_chunks() print('finished.') # save the output out_dir = out_template.format(domain, metadata_obj.slug) # produces something like '01-GEN.usfm' book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id) print('Writing ' + book_file_name + '...', end=' ') write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm) print('finished.') # look for an existing status.json file print('Updating the status for {0}...'.format(metadata_obj.lang), end=' ') status_file = '{0}/status.json'.format(out_dir) if os.path.isfile(status_file): status = BibleStatus(status_file) else: status = BibleStatus() status.update_from_meta_data(metadata_obj) # add this book to the list of "books_published" status.add_book_published(book) # update the "date_modified" status.date_modified = today print('finished.') # save the status.json file print('Writing status.json...', end=' ') status_json = json.dumps(status, sort_keys=True, indent=2, cls=BibleEncoder) write_file(status_file, status_json) print('finished') # let the API know it is there print('Publishing to the API...') with api_publish(out_dir) as api: api.run() print('Finished publishing to the API.') # update the catalog print() print('Updating the catalogs...', end=' ') update_catalog() print('finished.') print_notice('Check {0} and do a git push'.format(out_dir))
def import_obs(lang_data, git_repo, door43_url, no_pdf): global download_dir, root, api_dir lang_code = lang_data['lc'] # pre-flight checklist link_source = '/var/www/vhosts/api.unfoldingword.org/httpdocs/obs/jpg/1/en' if not os.path.isdir(link_source): print_error( 'Image source directory not found: {0}.'.format(link_source)) sys.exit(1) if git_repo[-1:] != '/': git_repo += '/' if no_pdf: tools_dir = None else: tools_dir = '/var/www/vhosts/door43.org/tools' if not os.path.isdir(tools_dir): tools_dir = os.path.expanduser('~/Projects/tools') # prompt if tools not found if not os.path.isdir(tools_dir): tools_dir = None print_notice( 'The tools directory was not found. The PDF cannot be generated.' ) resp = prompt( 'Do you want to continue without generating a PDF? [Y|n]: ') if resp != '' and resp != 'Y' and resp != 'y': sys.exit(0) if git_repo: if git_repo[-1:] == '/': git_repo = git_repo[:-1] download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) # make sure OBS is initialized on Dokuwiki test_dir = root.format(lang_code, '') if not os.path.isdir(test_dir): print_warning( 'It seems OBS has not been initialized on Door43.org for {0}'. format(lang_code)) sys.exit(1) elif door43_url: print_error('URL not yet implemented.') return else: print_error('Source not provided.') return # get the source files from the git repository if 'github' in git_repo: # https://github.com/unfoldingWord/obs-ru # https://raw.githubusercontent.com/unfoldingWord/obs-ru/master/obs-ru.json raw_url = git_repo.replace('github.com', 'raw.githubusercontent.com') elif 'git.door43.org' in git_repo: raw_url = join_url_parts(git_repo, 'raw') else: # this is to keep IntelliJ happy, is should have been caught in sub main return # download needed files from the repository file_suffix = '-{0}.json'.format(lang_code.lower()) files_to_download = [ join_url_parts(raw_url, 'master/obs' + file_suffix), join_url_parts(raw_url, 'master/status' + file_suffix) ] for file_to_download in files_to_download: downloaded_file = os.path.join(download_dir, file_to_download.rpartition('/')[2]) try: print('Downloading {0}...'.format(file_to_download), end=' ') download_file(file_to_download, downloaded_file) finally: print('finished.') # read the files from the git repository file_suffix = '-{0}.json'.format(lang_code.lower()) obs_obj = None status_obj = None # front_matter_found = False # back_matter_found = False try: print('Examining the files...', end=' ') for root_path, dirs, files in os.walk(download_dir): if not len(files): continue for git_file in files: if git_file == 'obs' + file_suffix: obs_obj = OBS(os.path.join(root_path, git_file)) elif git_file == 'status' + file_suffix: status_obj = OBSStatus(os.path.join(root_path, git_file)) # elif 'front-matter' in git_file: # front_matter_found = True # elif 'back-matter' in git_file: # back_matter_found = True finally: print('finished.') # check data integrity if not obs_obj.verify_all(): sys.exit(1) if not status_obj: print_error( 'The file "status{0}" was not found in the git repository.'.format( file_suffix)) sys.exit(1) # create Dokuwiki pages print_ok('Begin: ', 'creating Dokuwiki pages.') for chapter in obs_obj.chapters: chapter_title = '====== {0} ======'.format(chapter['title']) chapter_ref = '//{0}//'.format(chapter['ref']) chapter_body = '' chapter_num = chapter['number'].zfill(2) for frame in chapter['frames']: chapter_body += '{{{{{0}?direct&}}}}\n\n{1}\n\n'.format( frame['img'], frame['text']) file_name = root.format(lang_code, chapter_num + '.txt') print(' Writing {0}'.format(file_name)) with codecs.open(file_name, 'w', 'utf-8-sig') as out_file: out_file.write('{0}\n\n{1}{2}\n'.format(chapter_title, chapter_body, chapter_ref)) print_ok('Finished: ', 'creating Dokuwiki pages.') # Create image symlinks on api.unfoldingword.org try: print('Creating symlink to images directory...', end=' ') link_name = '/var/www/vhosts/api.unfoldingword.org/httpdocs/obs/jpg/1/{0}'.format( lang_code.lower()) if not os.path.isfile(link_name) and not os.path.isdir( link_name) and not os.path.islink(link_name): os.symlink(link_source, link_name) finally: print('finished.') # Create PDF via ConTeXt if not no_pdf and tools_dir and os.path.isdir(tools_dir): try: print_ok('Beginning: ', 'PDF generation.') script_file = os.path.join(tools_dir, 'obs/book/pdf_export.sh') out_dir = api_dir.format(lang_code) make_dir(out_dir) process = subprocess.Popen([ script_file, '-l', lang_code, '-c', status_obj.checking_level, '-v', status_obj.version, '-o', out_dir ], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # wait for the process to terminate out, err = process.communicate() exit_code = process.returncode out = out.strip().decode('utf-8') err = err.strip().decode('utf-8') # the error message may be in stdout if exit_code != 0: if not err: err = out out = None if err: print_error(err, 2) if out: print(' ' + out) print(' PDF subprocess finished with exit code {0}'.format( exit_code)) finally: print_ok('Finished:', 'generating PDF.')
def main(git_repo, tag, no_pdf): global download_dir # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables today = ''.join(str(datetime.date.today()).rsplit( str('-'))[0:3]) # str(datetime.date.today()) download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/{0}.zip'.format(tag)) manifest = None status = None # type: OBSStatus content_dir = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'manifest.json' in files: # read the manifest try: print('Reading the manifest...', end=' ') content_dir = root manifest = load_json_object(os.path.join( root, 'manifest.json')) status = OBSStatus.from_manifest(manifest) finally: print('finished.') if 'content' in dirs: content_dir = os.path.join(root, 'content') # if we have everything, exit the loop if content_dir and manifest and status: break # check for valid repository structure if not manifest: print_error('Did not find manifest.json in {}'.format(git_repo)) sys.exit(1) print('Initializing OBS object...', end=' ') lang = manifest['language']['slug'] obs_obj = OBS() obs_obj.date_modified = today obs_obj.direction = manifest['language']['dir'] obs_obj.language = lang print('finished') obs_obj.chapters = load_obs_chapters(content_dir) obs_obj.chapters.sort(key=lambda c: int(c['number'])) if not obs_obj.verify_all(): print_error('Quality check did not pass.') sys.exit(1) print('Loading languages...', end=' ') lang_dict = OBS.load_lang_strings() print('finished.') print('Loading the catalog...', end=' ') export_dir = '/var/www/vhosts/door43.org/httpdocs/exports' cat_path = os.path.join(export_dir, 'obs-catalog.json') catalog = load_json_object(cat_path, []) print('finished') print('Getting already published languages...', end=' ') json_lang_file_path = os.path.join(export_dir, lang, 'obs', 'obs-{0}.json'.format(lang)) if lang not in lang_dict: print("Configuration for language {0} missing.".format(lang)) sys.exit(1) print('finished.') updated = update_language_catalog(lang, obs_obj.direction, status, today, lang_dict, catalog) print('Writing the OBS file to the exports directory...', end=' ') cur_json = json.dumps(obs_obj, sort_keys=True, cls=OBSEncoder) if updated: ([x for x in catalog if x['language'] == lang][0]['date_modified']) = today # noinspection PyTypeChecker write_file(json_lang_file_path.replace('.txt', '.json'), cur_json) print('finished.') export_to_api(lang, status, today, cur_json) cat_json = json.dumps(catalog, sort_keys=True, cls=OBSEncoder) write_file(cat_path, cat_json) # update the catalog print_ok('STARTING: ', 'updating the catalogs.') update_catalog() print_ok('FINISHED: ', 'updating the catalogs.') if no_pdf: return create_pdf(lang, status.checking_level, status.version)
def main(git_repo, tag, no_pdf): global download_dir # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables today = ''.join(str(datetime.date.today()).rsplit('-')[0:3]) # str(datetime.date.today()) download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip') manifest = None status = None # type: OBSStatus content_dir = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'manifest.json' in files: # read the manifest try: print('Reading the manifest...', end=' ') content_dir = root manifest = load_json_object(os.path.join(root, 'manifest.json')) finally: print('finished.') if 'status.json' in files: # read the meta data try: print('Reading the status...', end=' ') content_dir = root status = OBSStatus(os.path.join(root, 'status.json')) finally: print('finished.') # if we have everything, exit the loop if content_dir and manifest and status: break # check for valid repository structure if not manifest: print_error('Did not find manifest.json in {}'.format(git_repo)) sys.exit(1) if not status: print_error('Did not find status.json in {}'.format(git_repo)) sys.exit(1) print('Initializing OBS object...', end=' ') lang = manifest['target_language']['id'] obs_obj = OBS() obs_obj.date_modified = today obs_obj.direction = manifest['target_language']['direction'] obs_obj.language = lang print('finished') obs_obj.chapters = load_obs_chapters(content_dir) obs_obj.chapters.sort(key=lambda c: c['number']) if not obs_obj.verify_all(): print_error('Quality check did not pass.') sys.exit(1) print('Loading languages...', end=' ') lang_dict = OBS.load_lang_strings() print('finished.') print('Loading the catalog...', end=' ') export_dir = '/var/www/vhosts/door43.org/httpdocs/exports' # uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json') # uw_catalog = load_json_object(uw_cat_path, []) # uw_cat_langs = [x['language'] for x in uw_catalog] cat_path = os.path.join(export_dir, 'obs-catalog.json') catalog = load_json_object(cat_path, []) print('finished') print('Getting already published languages...', end=' ') json_lang_file_path = os.path.join(export_dir, lang, 'obs', 'obs-{0}.json'.format(lang)) # prev_json_lang = load_json_object(json_lang_file_path, {}) if lang not in lang_dict: print("Configuration for language {0} missing.".format(lang)) sys.exit(1) print('finished.') updated = update_language_catalog(lang, obs_obj.direction, status, today, lang_dict, catalog) print('Writing the OBS file to the exports directory...', end=' ') cur_json = json.dumps(obs_obj, sort_keys=True, cls=OBSEncoder) if updated: ([x for x in catalog if x['language'] == lang][0]['date_modified']) = today write_file(json_lang_file_path.replace('.txt', '.json'), cur_json) print('finished.') export_to_api(lang, status, today, cur_json) cat_json = json.dumps(catalog, sort_keys=True, cls=OBSEncoder) write_file(cat_path, cat_json) # update the catalog print_ok('STARTING: ', 'updating the catalogs.') update_catalog() print_ok('FINISHED: ', 'updating the catalogs.') if no_pdf: return create_pdf(lang, status.checking_level, status.version)
def main(git_repo, tag): global download_dir # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip') metadata_obj = None content_dir = None toc_obj = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'meta.yaml' in files: # read the metadata try: print('Reading the metadata...', end=' ') metadata_obj = TAMetaData(os.path.join(root, 'meta.yaml')) finally: print('finished.') if 'toc.yaml' in files: # read the table of contents try: print('Reading the toc...', end=' ') toc_obj = TATableOfContents(os.path.join(root, 'toc.yaml')) finally: print('finished.') if 'content' in dirs: content_dir = os.path.join(root, 'content') # if we have everything, exit the loop if content_dir and metadata_obj and toc_obj: break # check for valid repository structure if not metadata_obj: print_error('Did not find meta.yaml in {}'.format(git_repo)) sys.exit(1) if not content_dir: print_error('Did not find the content directory in {}'.format(git_repo)) sys.exit(1) if not toc_obj: print_error('Did not find toc.yaml in {}'.format(git_repo)) sys.exit(1) # check for missing pages check_missing_pages(toc_obj, content_dir) # generate the pages print('Generating the manual...', end=' ') manual = TAManual(metadata_obj, toc_obj) manual.load_pages(content_dir) print('finished.') file_name = os.path.join(get_output_dir(), '{0}_{1}.json'.format(manual.meta.manual, manual.meta.volume)) print('saving to {0} ...'.format(file_name), end=' ') content = json.dumps(manual, sort_keys=True, indent=2, cls=TAEncoder) write_file(file_name, content) print('finished.')
def rechunk_this_one(api_directory): global id_re, s5_re print_notice('Processing {}'.format(api_directory)) # read the status.json file with codecs.open(os.path.join(api_directory, 'status.json'), 'r', 'utf-8-sig') as in_file: status = json.loads(in_file.read()) # determine versification if status['lang'] == 'ru': versification = 'rsc' elif status['lang'] == 'hi' or status['lang'] == 'sr-Latn' or status['lang'] == 'hu' or status['lang'] == 'ta': versification = 'ufw-odx' elif status['lang'] == 'bn': versification = 'ufw-bn' elif status['lang'] == 'ar': versification = 'avd' elif status['lang'] == 'kn': versification = 'ufw-rev' else: versification = 'ufw' versification_data = Bible.get_versification(versification) # type: list<Book> # remove all .sig files for f in os.listdir(api_directory): if f.endswith('.sig'): os.remove(os.path.join(api_directory, f)) # rechunk files in this directory usfm_files = glob(os.path.join(api_directory, '*.usfm')) errors_found = False for usfm_file in usfm_files: if usfm_file.endswith('LICENSE.usfm'): continue # read the file with codecs.open(usfm_file, 'r', 'utf-8') as in_file: book_text = in_file.read() # get the book id book_search = id_re.search(book_text) if not book_search: print_error('Book id not found in {}'.format(usfm_file)) sys.exit(1) book_id = book_search.group(1) print('Beginning {}...'.format(book_id), end=' ') # get book versification info book = next((b for b in versification_data if b.book_id == book_id), None) if not book: print_error('Book versification data was not found for "{}"'.format(book_id)) sys.exit(1) # remove \s5 lines book_text = s5_re.sub('', book_text) # get the usfm for the book book.set_usfm(book_text) # do basic checks book.verify_chapters_and_verses(True) if book.validation_errors: errors_found = True # get chunks for this book Bible.chunk_book(versification, book) book.apply_chunks() # produces something like '01-GEN.usfm' book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id) print('Writing ' + book_file_name + '...', end=' ') write_file(usfm_file, book.usfm) print('finished.') if errors_found: print_error('These USFM errors must be corrected before publishing can continue.') sys.exit(1) # rebuild source for tS print() print('Publishing to the API...') with api_publish(api_directory) as api: api.run() print('Finished publishing to the API.') # update the catalog print() print('Updating the catalogs...', end=' ') update_catalog() print('finished.')
parser.add_argument('-t', '--testexport', dest="testexport", default=False, action='store_true', help="Test export to unfoldingWord.") parser.add_argument('-p', '--nopdf', dest='nopdf', action='store_true', help='Do not produce a PDF.') args = parser.parse_args(sys.argv[1:]) lang = args.lang uw_export = args.uwexport test_export = args.testexport no_pdf = args.nopdf print_ok('STARTING: ', 'importing OBS from Dokuwiki') # pre-flight checklist link_source = '/var/www/vhosts/api.unfoldingword.org/httpdocs/obs/jpg/1/en' if not os.path.isdir(link_source): print_error('Image source directory not found: {0}.'.format(link_source)) sys.exit(1) if no_pdf: tools_dir = None else: tools_dir = '/var/www/vhosts/door43.org/tools' if not os.path.isdir(tools_dir): tools_dir = os.path.expanduser('~/Projects/tools') # prompt if tools not found if not os.path.isdir(tools_dir): tools_dir = None print_notice('The tools directory was not found. The PDF cannot be generated.') resp = prompt('Do you want to continue without generating a PDF? [Y|n]: ') if resp != '' and resp != 'Y' and resp != 'y':
def main(git_repo, tag): global download_dir # clean up the git repo url if git_repo[-4:] == '.git': git_repo = git_repo[:-4] if git_repo[-1:] == '/': git_repo = git_repo[:-1] # initialize some variables download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2]) make_dir(download_dir) downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2]) file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip') metadata_obj = None content_dir = None toc_obj = None # download the repository try: print('Downloading {0}...'.format(file_to_download), end=' ') if not os.path.isfile(downloaded_file): download_file(file_to_download, downloaded_file) finally: print('finished.') try: print('Unzipping...'.format(downloaded_file), end=' ') unzip(downloaded_file, download_dir) finally: print('finished.') # examine the repository for root, dirs, files in os.walk(download_dir): if 'meta.yaml' in files: # read the metadata try: print('Reading the metadata...', end=' ') metadata_obj = TAMetaData(os.path.join(root, 'meta.yaml')) finally: print('finished.') if 'toc.yaml' in files: # read the table of contents try: print('Reading the toc...', end=' ') toc_obj = TATableOfContents(os.path.join(root, 'toc.yaml')) finally: print('finished.') if 'content' in dirs: content_dir = os.path.join(root, 'content') # if we have everything, exit the loop if content_dir and metadata_obj and toc_obj: break # check for valid repository structure if not metadata_obj: print_error('Did not find meta.yaml in {}'.format(git_repo)) sys.exit(1) if not content_dir: print_error( 'Did not find the content directory in {}'.format(git_repo)) sys.exit(1) if not toc_obj: print_error('Did not find toc.yaml in {}'.format(git_repo)) sys.exit(1) # check for missing pages check_missing_pages(toc_obj, content_dir) # generate the pages print('Generating the manual...', end=' ') manual = TAManual(metadata_obj, toc_obj) manual.load_pages(content_dir) print('finished.') file_name = os.path.join( get_output_dir(), '{0}_{1}.json'.format(manual.meta.manual, manual.meta.volume)) print('saving to {0} ...'.format(file_name), end=' ') content = json.dumps(manual, sort_keys=True, indent=2, cls=TAEncoder) write_file(file_name, content) print('finished.')
if __name__ == '__main__': print() parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-l', '--lang', dest='lang', default=False, required=True, help='Language code of resource.') parser.add_argument('-r', '--gitrepo', dest='gitrepo', default=False, required=False, help='Git repository where the source can be found.') parser.add_argument('-u', '--url', dest='url', default=False, required=False, help='Door43 page where the source can be found.') parser.add_argument('-p', '--nopdf', dest='nopdf', action='store_true', help='Do not produce a PDF.') args = parser.parse_args(sys.argv[1:]) if not args.gitrepo and not args.url: print_error('You must provide either --gitrepo or --url to this script.') sys.exit(0) try: # get the language data try: print('Downloading language data...', end=' ') langs = get_languages() finally: print('finished.') this_lang = next(l for l in langs if l['lc'] == args.lang) if not this_lang: print_error('Information for language "{0}" was not found.'.format(args.lang)) sys.exit(1)