Python unzip Exemples, general_tools.file_utils.unzip Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_project_deployer.py Projet : unfoldingWord-dev/door43-job-handler

 def mock_s3_tn_project(self, part):
     zip_file = os.path.join(self.resources_dir, 'converted_projects',
                             'en_tn_converted.zip')
     out_dir = os.path.join(self.temp_dir, 'en_tn_converted')
     unzip(zip_file, out_dir)
     src_dir = os.path.join(out_dir, 'en_tn_converted')
     self.project_files = [
         f for f in os.listdir(src_dir)
         if os.path.isfile(os.path.join(src_dir, f))
     ]
     self.project_key = 'u/door43/en_tn/12345678'
     build_log = file_utils.load_json_object(
         os.path.join(src_dir, 'build_log.json'))
     build_log['part'] = part
     file_utils.write_file(os.path.join(src_dir, 'build_log.json'),
                           build_log)
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, 'build_log.json'),
         '{0}/{1}/build_log.json'.format(self.project_key, part))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, 'index.json'),
         '{0}/{1}/index.json'.format(self.project_key, part))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, 'build_log.json'),
         '{0}/{1}/finished'.format(self.project_key, part))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, '01-GEN.html'),
         '{0}/{1}/01-GEN.html'.format(self.project_key, part))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(src_dir, 'project.json'),
         'u/door43/en_tq/project.json')
     AppSettings.door43_s3_handler().upload_file(
         os.path.join(self.resources_dir, 'templates', 'project-page.html'),
         'templates/project-page.html')

Exemple #2

0

Afficher le fichier

 def test_convert_only_jas(self):
     """Runs the converter and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir, 'eight_bible_books.zip')
     # zip_file = self.make_duplicate_zip_that_can_be_deleted(zip_file)
     # out_zip_file = tempfile.NamedTemporaryFile(suffix='.zip', dir=self.temp_dir, delete=False).name
     self.in_dir = tempfile.mkdtemp(prefix='udb_in_', dir=self.temp_dir)
     unzip(zip_file, self.in_dir)
     source_url = 'http://test.com/preconvert/22f3d09f7a.zip?convert_only=60-JAS.usfm'
     with closing(Usfm2HtmlConverter('Bible', self.in_dir)) as tx:
         # tx.input_zip_file = zip_file
         results = tx.run()
     # verify the output
     # self.assertTrue(os.path.isfile(out_zip_file), "There was no output zip file produced.")
     self.assertIsNotNone(results)
     # self.out_dir = tempfile.mkdtemp(prefix='udb_out_', dir=self.temp_dir)
     # unzip(out_zip_file, self.out_dir)
     # files_to_verify = ['60-JAS.html']
     # for file_to_verify in files_to_verify:
     #     file_name = os.path.join(self.out_dir, file_to_verify)
     #     self.assertTrue(os.path.isfile(file_name), 'UDB HTML file not found: {0}'.format(file_name))
     # files_to_not_verify = ['61-1PE.html', '62-2PE.html', '63-1JN.html', '64-2JN.html', '65-3JN.html',
     #                        '66-JUD.html', '67-REV.html']
     # for file_to_verify in files_to_not_verify:
     #     file_name = os.path.join(self.out_dir, file_to_verify)
     #     self.assertFalse(os.path.isfile(file_name), 'UDB HTML file not found: {0}'.format(file_name))
     # self.assertEqual(tx.source, source_url.split('?')[0])
     self.assertTrue(isinstance(results,dict))
     print("results2", results)
     self.assertTrue(results['success'])

Exemple #3

0

Afficher le fichier

    def test_run(self):
        """Runs the converter and verifies the output."""
        # test with the English tN
        zip_file = os.path.join(self.resources_dir, 'en_tn.tsv.zip')
        # zip_file = self.make_duplicate_zip_that_can_be_deleted(zip_file)
        # out_zip_file = tempfile.NamedTemporaryFile(prefix='en_tn_tsv_', suffix='.zip', delete=False).name
        self.in_dir = tempfile.mkdtemp(prefix='tn_in_', dir=self.temp_dir)
        unzip(zip_file, self.in_dir)
        with closing(Tsv2HtmlConverter('Translation_Notes',
                                       self.in_dir)) as tx:
            # tx.input_zip_file = zip_file
            results = tx.run()

        # verify the output
        # self.assertTrue(os.path.isfile(out_zip_file), "There was no output zip file produced.")
        # self.out_dir = tempfile.mkdtemp(prefix='tX_test_tn_tsv_')
        # unzip(out_zip_file, self.out_dir)
        # remove(out_zip_file)
        # # print(f"Got in {self.out_dir}: {os.listdir(self.out_dir)}")
        # files_to_verify = []
        # # for i in range(1, 51):
        #     # files_to_verify.append(f'en_tn_{str(i).zfill(2)}-{XXX}'.html')
        # for folder in BOOK_NUMBERS:
        #     book = f'{BOOK_NUMBERS[folder]}-{folder.upper()}'
        #     filename = f'en_tn_{book}.html'
        #     files_to_verify.append(filename)
        # for file_to_verify in files_to_verify:
        #     file_name = os.path.join(self.out_dir, file_to_verify)
        #     self.assertTrue(os.path.isfile(file_name), f'tN HTML file not found: {file_name}')
        self.assertTrue(isinstance(results, dict))
        self.assertTrue(results['success'])

Exemple #4

0

Afficher le fichier

 def test_multiple_projects(self):
     """ Populates the ResourceContainer object and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir,
                             'en-ta-multiple-projects.zip')
     self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_')
     unzip(zip_file, self.out_dir)
     repo_dir = os.path.join(self.out_dir, 'en_ta')
     rc = RC(directory=repo_dir)
     rc.as_dict()
     yaml = load_yaml_object(os.path.join(repo_dir, 'manifest.yaml'))
     self.assertEqual(rc.resource.identifier,
                      yaml['dublin_core']['identifier'])
     self.assertEqual(rc.resource.type, yaml['dublin_core']['type'])
     self.assertEqual(rc.resource.format, yaml['dublin_core']['format'])
     self.assertEqual(rc.resource.file_ext, 'md')
     self.assertEqual(rc.resource.conformsto,
                      yaml['dublin_core']['conformsto'])
     self.assertEqual(rc.resource.modified, yaml['dublin_core']['modified'])
     self.assertEqual(len(rc.project_ids), 4)
     self.assertEqual(rc.project_count, 4)
     chapters = rc.project('checking').chapters()
     self.assertEqual(len(chapters), 44)
     chunks = rc.project('checking').chunks('level1')
     self.assertEqual(chunks, ['01.md', 'sub-title.md', 'title.md'])
     self.assertTrue('acceptable' in rc.project('checking').config())
     self.assertTrue('title' in rc.project('checking').toc())
     self.assertTrue(
         rc.project('checking').toc()['title'], 'Table of Contents')

Exemple #5

0

Afficher le fichier

    def test_ceb_psa_text_ulb_L3(self):
        """ Populates the ResourceContainer object and verifies the output."""
        # test with the English OBS
        zip_file = os.path.join(self.resources_dir, 'ceb_psa_text_ulb_L3.zip')
        self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_')
        unzip(zip_file, self.out_dir)
        repo_dir = os.path.join(self.out_dir, 'ceb_psa_text_ulb_l3')
        rc = RC(directory=repo_dir)
        rc.as_dict()
        json = load_json_object(os.path.join(repo_dir, 'manifest.json'))
        self.assertEqual(rc.resource.identifier, json['resource']['id'])
        self.assertEqual(rc.resource.type, 'book')
        self.assertEqual(rc.resource.format, 'text/{0}'.format(json['format']))
        self.assertEqual(rc.resource.file_ext, json['format'])
        self.assertEqual(rc.resource.conformsto, 'pre-rc')
        self.assertEqual(rc.resource.modified,
                         datetime.utcnow().strftime('%Y-%m-%d'))
        chapters = rc.projects[0].chapters()
        idx = 1

        for chapter in chapters:
            if chapter.isnumeric():
                self.assertEqual(int(chapter), idx)
                idx += 1

        self.assertEqual(len(chapters), 151)
        chunks = rc.projects[0].chunks('01')
        self.assertEqual(len(chunks), 5)

Exemple #6

0

Afficher le fichier

    def test_run(self):
        """Runs the converter and verifies the output."""
        # test with the English OBS
        zip_file = os.path.join(self.resources_dir, 'en-obs.zip')
        # zip_file = self.make_duplicate_zip_that_can_be_deleted(zip_file)
        # out_zip_file = tempfile.NamedTemporaryFile(prefix='en-obs', suffix='.zip', delete=False).name
        self.in_dir = tempfile.mkdtemp(prefix='en_obs_in_', dir=self.temp_dir)
        unzip(zip_file, self.in_dir)
        with closing(Md2HtmlConverter('Open_Bible_Stories', self.in_dir)) as tx:
            # tx.input_zip_file = zip_file
            results = tx.run()

        # verify the output
        # # self.assertTrue(os.path.isfile(out_zip_file), "There was no output zip file produced.")
        # self.out_dir = tempfile.mkdtemp(prefix='tX_test_obs_')
        # unzip(out_zip_file, self.out_dir)
        # remove(out_zip_file)
        # files_to_verify = []
        # for i in range(1, 51):
        #     files_to_verify.append(str(i).zfill(2) + '.html')
        # for file_to_verify in files_to_verify:
        #     file_name = os.path.join(self.out_dir, file_to_verify)
        #     self.assertTrue(os.path.isfile(file_name), 'OBS HTML file not found: {0}'.format(file_name))
        self.assertTrue(isinstance(results,dict))
        self.assertTrue(results['success'])

Exemple #7

0

Afficher le fichier

Fichier : publish_tq.py Projet : unfoldingWord-dev/uw-publish

def main(date_today, tag, version):
    global download_dir

    repo = 'https://git.door43.org/Door43/en-tq'
    download_dir = tempfile.mkdtemp(prefix='tempTQ_')
    download_url = join_url_parts(repo, 'archive', '{0}.zip'.format(tag))
    downloaded_file = os.path.join(download_dir, 'tQ.zip')

    # download the repository
    try:
        print('Downloading {0}...'.format(download_url), end=' ')
        download_file(download_url, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    source_root = os.path.join(download_dir, 'en-tq', 'content')
    books = [x for x in os.listdir(source_root) if os.path.isdir(os.path.join(source_root, x))]

    for book in books:
        print('Processing {}.'.format(book))
        book_dir = os.path.join(source_root, book)
        api_path = os.path.join(api_v2, book, 'en')
        # noinspection PyUnresolvedReferences
        book_questions = []  # type: list[dict]

        for entry in os.listdir(book_dir):
            file_name = os.path.join(book_dir, entry)

            # we are only processing files
            if not os.path.isfile(file_name):
                continue

            # we are only processing markdown files
            if entry[-3:] != '.md':
                continue

            book_questions.append(get_cq(file_name))

        # Check to see if there are published questions in this book
        pub_check = [x['cq'] for x in book_questions if len(x['cq']) > 0]
        if len(pub_check) == 0:
            print('No published questions for {0}'.format(book))
            continue
        book_questions.sort(key=lambda y: y['id'])
        book_questions.append({'date_modified': date_today, 'version': version})
        write_file('{0}/questions.json'.format(api_path), book_questions, indent=2)

    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

Exemple #8

0

Afficher le fichier

 def unzip_converted_files(self, converted_zip_filepath:str) -> str:
     AppSettings.logger.debug(f"ClientConverterCallback.unzip_converted_files({converted_zip_filepath})…")
     unzip_dirpath = tempfile.mkdtemp(prefix='unzip_', dir=self.temp_dir)
     try:
         AppSettings.logger.debug(f"Unzipping {converted_zip_filepath} …")
         unzip(converted_zip_filepath, unzip_dirpath)
     finally:
         AppSettings.logger.debug("Unzip finished.")
     return unzip_dirpath

Exemple #9

0

Afficher le fichier

 def test_lint_warnings(self, mock_invoke):
     obs_zip_file = os.path.join(self.resources_dir, 'obs_linter',
                                 'en-obs.zip')
     unzip(obs_zip_file, self.temp_dir)
     source_dir = os.path.join(self.temp_dir, 'en-obs')
     mock_invoke.return_value = {}
     expected_warnings = True
     linter = ObsLinter(repo_subject='Open_Bible_Stories',
                        source_dir=source_dir)
     linter.run()
     self.verify_results(expected_warnings, linter)

Exemple #10

0

Afficher le fichier

 def doTransformTw(self, file_name):
     zip_file_path = os.path.join(self.resources_dir, file_name)
     # zip_file_path = self.make_duplicate_zip_that_can_be_deleted(zip_file_path)
     # self.out_zip_file = tempfile.NamedTemporaryFile(prefix='en_tw', suffix='.zip', delete=False).name
     self.in_dir = tempfile.mkdtemp(prefix='udb_in_', dir=self.temp_dir)
     unzip(zip_file_path, self.in_dir)
     self.return_val = None
     with closing(Md2HtmlConverter('Translation_Words', self.in_dir)) as tx:
         # tx.input_zip_file = zip_file_path
         self.return_val = tx.run()
     return tx

Exemple #11

0

Afficher le fichier

    def doTransformTn(self, file_name, part=None):
        zip_file_path = os.path.join(self.resources_dir, file_name)
        # zip_file_path = self.make_duplicate_zip_that_can_be_deleted(zip_file_path)
        # self.out_zip_file = tempfile.NamedTemporaryFile(prefix='en_tq', suffix='.zip', delete=False).name
        self.in_dir = tempfile.mkdtemp(prefix='udb_in_', dir=self.temp_dir)
        unzip(zip_file_path, self.in_dir)
        self.return_val = None
        # source = '' if not part else 'https://door43.org/dummy?convert_only={0}'.format(part)

        with closing(Md2HtmlConverter('Translation_Notes', self.in_dir)) as tx:
            # tx.input_zip_file = zip_file_path
            self.return_val = tx.run()
        return tx

Exemple #12

0

Afficher le fichier

def main(date_today, tag, version):
    """

    :param str|unicode date_today:
    :param str|unicode tag:
    :param str|unicode version:
    :return:
    """
    global download_dir, tw_aliases

    repo = 'https://git.door43.org/Door43/en-tw'
    download_dir = tempfile.mkdtemp(prefix='tempTW_')
    download_url = join_url_parts(repo, 'archive', '{0}.zip'.format(tag))
    downloaded_file = os.path.join(download_dir, 'tW.zip')

    # download the repository
    try:
        print('Downloading {0}...'.format(download_url), end=' ')
        download_file(download_url, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    tw_list = []
    for root, dirs, files in os.walk(
            os.path.join(download_dir, 'en-tw', 'content')):
        for f in files:
            file_name = os.path.join(root, f)
            tw = get_tw(file_name)
            if tw:
                tw_list.append(tw)

    for i in tw_list:  # type: dict
        if i['id'] in tw_aliases:
            i['aliases'] = [x for x in tw_aliases[i['id']] if x != i['term']]

    tw_list.sort(key=lambda y: len(y['term']), reverse=True)
    tw_list.append({'date_modified': date_today, 'version': version})
    api_path = os.path.join(api_v2, 'bible', 'en')
    write_file('{0}/terms.json'.format(api_path), tw_list, indent=2)

    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

Exemple #13

0

Afficher le fichier

    def extractFiles(cls, file_name, repo_name):
        file_path = os.path.join(TestTqPreprocessor.resources_dir, file_name)

        # 1) unzip the repo files
        temp_dir = tempfile.mkdtemp(prefix='Door43_test_repo_')
        unzip(file_path, temp_dir)
        repo_dir = os.path.join(temp_dir, repo_name)
        if not os.path.isdir(repo_dir):
            repo_dir = file_path

        # 2) Get the resource container
        rc = RC(repo_dir)

        return rc, repo_dir, temp_dir

Exemple #14

0

Afficher le fichier

Fichier : test_file_utils.py Projet : jag3773/tx-manager

    def test_unzip(self):
        tmp_dir = tempfile.mkdtemp()
        zip_file = tmp_dir + "/foo.zip"

        _, tmp_file = tempfile.mkstemp()
        with open(tmp_file, "w") as tmpf:
            tmpf.write("hello world")

        with zipfile.ZipFile(zip_file, "w") as zf:
            zf.write(tmp_file, os.path.basename(tmp_file))

        file_utils.unzip(zip_file, tmp_dir)
        with open(os.path.join(tmp_dir, os.path.basename(tmp_file))) as outf:
            self.assertEqual(outf.read(), "hello world")

Exemple #15

0

Afficher le fichier

    def test_unzip(self):
        self.tmp_dir = tempfile.mkdtemp(prefix='Door43_test_file_utils_')
        zip_file = os.path.join(self.tmp_dir, 'foo.zip')

        _, self.tmp_file = tempfile.mkstemp(prefix='Door43_test_')
        with open(self.tmp_file, "w") as tmpf:
            tmpf.write("hello world")

        with zipfile.ZipFile(zip_file, "w") as zf:
            zf.write(self.tmp_file, os.path.basename(self.tmp_file))

        file_utils.unzip(zip_file, self.tmp_dir)
        with open(os.path.join(self.tmp_dir,
                               os.path.basename(self.tmp_file))) as outf:
            self.assertEqual(outf.read(), "hello world")

Exemple #16

0

Afficher le fichier

 def test_en_obs_manifest_yaml(self):
     """ Populates the ResourceContainer object and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir, 'en-obs-manifest-yaml.zip')
     self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_')
     unzip(zip_file, self.out_dir)
     repo_dir = os.path.join(self.out_dir, 'en_obs')
     rc = RC(directory=repo_dir, repo_name='en_obs')
     rc_dic = rc.as_dict()
     yaml = load_yaml_object(os.path.join(repo_dir, 'manifest.yaml'))
     self.assertDictEqual(yaml, rc_dic)
     chapters = rc.projects[0].chapters()
     self.assertEqual(len(chapters), 2)
     chunks = rc.project().chunks('front')
     self.assertEqual(chunks, ['intro.md', 'title.md'])

Exemple #17

0

Afficher le fichier

Fichier : main.py Projet : richmahn/tx-webhook-client

def download_repo(commit_url, repo_dir):
    repo_zip_url = commit_url.replace('commit', 'archive') + '.zip'
    repo_zip_file = os.path.join(tempfile.gettempdir(),
                                 repo_zip_url.rpartition('/')[2])
    try:
        print('Downloading {0}...'.format(repo_zip_url))
        if not os.path.isfile(repo_zip_file):
            download_file(repo_zip_url, repo_zip_file)
    finally:
        print('finished.')

    try:
        print('Unzipping {0}...'.format(repo_zip_file))
        unzip(repo_zip_file, repo_dir)
    finally:
        print('finished.')

Exemple #18

0

Afficher le fichier

Fichier : webhook.py Projet : unfoldingWord-dev/tx-job-handler

def download_source_file(source_url, destination_folder):
    """
    Downloads the specified source file
        and unzips it if necessary.

    :param str source_url: The URL of the file to download
    :param str destination_folder:   The directory where the downloaded file should be unzipped
    :return: None
    """
    AppSettings.logger.debug(
        f"download_source_file( {source_url}, {destination_folder} )")
    source_filepath = os.path.join(destination_folder,
                                   source_url.rpartition(os.path.sep)[2])
    AppSettings.logger.debug(f"source_filepath: {source_filepath}")

    try:
        AppSettings.logger.info(f"Downloading {source_url} …")

        # if the file already exists, remove it, we want a fresh copy
        if os.path.isfile(source_filepath):
            os.remove(source_filepath)

        download_file(source_url, source_filepath)
    finally:
        AppSettings.logger.debug("Downloading finished.")

    if source_url.lower().endswith('.zip'):
        try:
            AppSettings.logger.debug(f"Unzipping {source_filepath} …")
            # TODO: This is unsafe if the zipfile comes from an untrusted source
            unzip(source_filepath, destination_folder)
        finally:
            AppSettings.logger.debug("Unzipping finished.")

        # clean up the downloaded zip file
        if os.path.isfile(source_filepath):
            os.remove(source_filepath)

    str_filelist = str(os.listdir(destination_folder))
    str_filelist_adjusted = str_filelist if len(str_filelist)<1500 \
                            else f'{str_filelist[:1000]} …… {str_filelist[-500:]}'
    AppSettings.logger.debug(
        f"Destination folder '{destination_folder}' now has: {str_filelist_adjusted}"
    )

Exemple #19

0

Afficher le fichier

 def test_bible_no_manifest(self):
     """ Populates the ResourceContainer object and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir, 'bible-no-manifest.zip')
     self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_')
     unzip(zip_file, self.out_dir)
     repo_dir = os.path.join(self.out_dir, 'en_ulb')
     rc = RC(directory=repo_dir)
     rc.as_dict()
     self.assertEqual(rc.resource.identifier, 'en_ulb')  # RJH: was 'ulb'
     self.assertEqual(rc.resource.type, 'bundle')
     self.assertEqual(rc.resource.format, 'text/usfm')
     self.assertEqual(rc.resource.file_ext, 'usfm')
     self.assertEqual(rc.resource.conformsto, 'pre-rc')
     self.assertEqual(rc.resource.modified,
                      datetime.utcnow().strftime('%Y-%m-%d'))
     chapters = rc.project().chapters()
     self.assertEqual(len(chapters), 0)
     self.assertEqual(len(rc.project().usfm_files()), 8)

Exemple #20

0

Afficher le fichier

Fichier : test_project_deployer.py Projet : unfoldingWord-dev/door43-job-handler

 def mock_s3_obs_project(self):
     zip_file = os.path.join(self.resources_dir, 'converted_projects',
                             'en-obs-complete.zip')
     out_dir = os.path.join(self.temp_dir, 'en-obs-complete')
     unzip(zip_file, out_dir)
     project_dir = os.path.join(out_dir, 'door43', 'en-obs', '12345678')
     self.project_files = [
         f for f in os.listdir(project_dir)
         if os.path.isfile(os.path.join(project_dir, f))
     ]
     self.project_key = 'u/door43/en-obs/12345678'
     for filename in self.project_files:
         AppSettings.cdn_s3_handler().upload_file(
             os.path.join(project_dir, filename),
             '{0}/{1}'.format(self.project_key, filename))
     AppSettings.cdn_s3_handler().upload_file(
         os.path.join(out_dir, 'door43', 'en-obs', 'project.json'),
         'u/door43/en-obs/project.json')
     AppSettings.door43_s3_handler().upload_file(
         os.path.join(self.resources_dir, 'templates', 'project-page.html'),
         'templates/project-page.html')

Exemple #21

0

Afficher le fichier

 def test_bible_from_tx_pre_rc(self):
     """ Populates the ResourceContainer object and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir, 'id_mat_text_ulb-ts.zip')
     self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_')
     unzip(zip_file, self.out_dir)
     repo_dir = os.path.join(self.out_dir, 'id_mat_text_ulb-ts')
     rc = RC(directory=repo_dir)
     rc.as_dict()
     json = load_json_object(os.path.join(repo_dir, 'manifest.json'))
     self.assertEqual(rc.resource.identifier, json['resource']['id'])
     self.assertEqual(rc.resource.type, 'book')
     self.assertEqual(rc.resource.format, 'text/{0}'.format(json['format']))
     self.assertEqual(rc.resource.file_ext, json['format'])
     self.assertEqual(rc.resource.conformsto, 'pre-rc')
     self.assertEqual(rc.resource.modified,
                      datetime.utcnow().strftime('%Y-%m-%d'))
     chapters = rc.projects[0].chapters()
     self.assertEqual(len(chapters), 29)
     chunks = rc.projects[0].chunks('01')
     self.assertEqual(len(chunks), 11)

Exemple #22

0

Afficher le fichier

 def test_en_obs_package_json(self):
     """ Populates the ResourceContainer object and verifies the output."""
     # test with the English OBS
     zip_file = os.path.join(self.resources_dir, 'en-obs-package-json.zip')
     self.out_dir = tempfile.mkdtemp(prefix='Door43_test_repo_')
     unzip(zip_file, self.out_dir)
     repo_dir = os.path.join(self.out_dir, 'en-obs')
     rc = RC(directory=repo_dir)
     rc.as_dict()
     package_json = load_json_object(os.path.join(repo_dir, 'package.json'))
     self.assertEqual(rc.resource.identifier,
                      package_json['resource']['slug'])
     self.assertEqual(rc.resource.type, 'book')
     self.assertEqual(rc.resource.format, package_json['content_mime_type'])
     self.assertEqual(rc.resource.file_ext, 'md')
     self.assertEqual(rc.resource.conformsto, 'pre-rc')
     self.assertEqual(rc.resource.issued,
                      package_json['resource']['status']['pub_date'])
     chapters = rc.projects[0].chapters()
     self.assertEqual(len(chapters), 2)
     chunks = rc.project().chunks('_back')
     self.assertEqual(chunks, ['back-matter.md'])

Exemple #23

0

Afficher le fichier

Fichier : test_project_deployer.py Projet : unfoldingWord-dev/door43-job-handler

    def mock_s3_bible_project(self,
                              test_file_name,
                              project_key,
                              multi_part=False):
        converted_proj_dir = os.path.join(self.resources_dir,
                                          'converted_projects')
        test_file_base = test_file_name.split('.zip')[0]
        zip_file = os.path.join(converted_proj_dir, test_file_name)
        out_dir = os.path.join(self.temp_dir, test_file_base)
        unzip(zip_file, out_dir)
        project_dir = os.path.join(out_dir, test_file_base) + os.path.sep
        self.project_files = file_utils.get_files(out_dir)
        self.project_key = project_key
        for filename in self.project_files:
            sub_path = filename.split(project_dir)[1].replace(
                os.path.sep, '/')  # Make sure it is a bucket path
            AppSettings.cdn_s3_handler().upload_file(
                filename, '{0}/{1}'.format(project_key, sub_path))

            if multi_part:  # copy files from cdn to door43
                base_name = os.path.basename(filename)
                if '.html' in base_name:
                    with open(filename, 'r') as f:
                        soup = BeautifulSoup(f, 'html.parser')

                    # add nav tag
                    new_tag = soup.new_tag('div', id='right-sidebar')
                    soup.body.append(new_tag)
                    html = str(soup)
                    file_utils.write_file(
                        filename, html.encode('ascii', 'xmlcharrefreplace'))

                AppSettings.door43_s3_handler().upload_file(
                    filename, '{0}/{1}'.format(project_key, base_name))

        # u, user, repo = project_key
        AppSettings.door43_s3_handler().upload_file(
            os.path.join(self.resources_dir, 'templates', 'project-page.html'),
            'templates/project-page.html')

Exemple #24

0

Afficher le fichier

 def test_matt_complete_with_backslash(self):
     """
     Runs the converter and verifies the output
     """
     zip_file = os.path.join(self.resources_dir, 'kpb_mat_text_udb.zip')
     # zip_file = self.make_duplicate_zip_that_can_be_deleted(zip_file)
     # out_zip_file = tempfile.NamedTemporaryFile(suffix='.zip', dir=self.temp_dir, delete=False).name
     self.in_dir = tempfile.mkdtemp(prefix='udb_in_', dir=self.temp_dir)
     unzip(zip_file, self.in_dir)
     with closing(Usfm2HtmlConverter('Bible', self.in_dir)) as tx:
         # tx.input_zip_file = zip_file
         results = tx.run()
     # verify the output
     # self.assertTrue(os.path.isfile(out_zip_file), "There was no output zip file produced.")
     self.assertIsNotNone(results)
     # self.out_dir = tempfile.mkdtemp(prefix='udb_out_', dir=self.temp_dir)
     # unzip(out_zip_file, self.out_dir)
     # files_to_verify = ['41-MAT.html']
     # self.verify_files(files_to_verify)
     self.assertTrue(isinstance(results,dict))
     print("results6", results)
     self.assertTrue(results['success'])

Exemple #25

0

Afficher le fichier

    def verifyTransform(self, tx, missing_chapters=None):
        if not missing_chapters:
            missing_chapters = []
        self.assertTrue(os.path.isfile(self.out_zip_file), "There was no output zip file produced.")
        self.assertIsNotNone(self.return_val, "There was no return value.")
        self.out_dir = tempfile.mkdtemp(prefix='tX_test_obs_')
        unzip(self.out_zip_file, self.out_dir)
        remove_file(self.out_zip_file)

        files_to_verify = []
        files_missing = []
        for i in range(1, 51):
            file_name = str(i).zfill(2) + '.html'
            if not i in missing_chapters:
                files_to_verify.append(file_name)
            else:
                files_missing.append(file_name)

        for file_to_verify in files_to_verify:
            file_path = os.path.join(self.out_dir, file_to_verify)
            contents = self.getContents(file_path)
            self.assertIsNotNone(contents, 'OBS HTML body contents not found: {0}'.format(os.path.basename(file_path)))

        for file_to_verify in files_missing:
            file_path = os.path.join(self.out_dir, file_to_verify)
            contents = self.getContents(file_path)
            self.assertIsNone(contents, 'OBS HTML body contents present, but should not be: {0}'.format(os.path.basename(file_path)))

        self.assertEqual(self.return_val['success'], self.expected_success, "Mismatch in for success boolean")
        self.assertEqual(len(self.return_val['info']) == 0, self.expected_info_empty, "Mismatch in expected info empty")
        for warning in self.return_val['warnings']:
            AppSettings.logger.debug("Warning: " + warning)
        for error in self.return_val['errors']:
            AppSettings.logger.debug("Error: " + error)
        self.assertEqual(len(self.return_val['warnings']), self.expected_warnings, "Mismatch in expected warnings")
        self.assertEqual(len(self.return_val['errors']), self.expected_errors, "Mismatch in expected errors")

Exemple #26

0

Afficher le fichier

def main(resource, lang, slug, name, checking, contrib, ver, check_level,
         comments, source):

    global downloaded_file, unzipped_dir, out_template

    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])
    downloaded_file = '/tmp/{0}'.format(resource.rpartition('/')[2])
    unzipped_dir = '/tmp/{0}'.format(resource.rpartition('/')[2].strip('.zip'))
    out_dir = out_template.format(slug, lang)

    if not os.path.isfile(downloaded_file):
        download_file(resource, downloaded_file)

    unzip(downloaded_file, unzipped_dir)

    books_published = {}
    there_were_errors = False

    for root, dirs, files in os.walk(unzipped_dir):

        # only usfm files
        files = [f for f in files if f[-3:].lower() == 'sfm']

        if not len(files):
            continue

        # there are usfm files, which book is this?
        test_dir = root.rpartition('/')[2]
        book = Book.create_book(test_dir)  # type: Book

        if book:
            book_text = ''
            files.sort()

            for usfm_file in files:
                with codecs.open(os.path.join(root, usfm_file), 'r',
                                 'utf-8') as in_file:
                    book_text += in_file.read() + '\n'

            book.set_usfm(book_text)
            book.clean_usfm()

            # do basic checks
            book.verify_usfm_tags()
            book.verify_chapters_and_verses()
            if len(book.validation_errors) > 0:
                there_were_errors = True

            if there_were_errors:
                continue

            # get chunks for this book
            book.apply_chunks()

            # produces something like '01-GEN.usfm'
            book_file_name = '{0}-{1}.usfm'.format(
                str(book.number).zfill(2), book.book_id)
            print('Writing ' + book_file_name)
            write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm)

            meta = ['Bible: OT']
            if book.number > 39:
                meta = ['Bible: NT']
            books_published[book.book_id.lower()] = {
                'name': book.name,
                'meta': meta,
                'sort': str(book.number).zfill(2),
                'desc': ''
            }

    if there_were_errors:
        print_warning('There are errors you need to fix before continuing.')
        exit()

    source_ver = ver
    if '.' in ver:
        source_ver = ver.split('.')[0]
    status = {
        "slug": '{0}-{1}'.format(slug.lower(), lang),
        "name": name,
        "lang": lang,
        "date_modified": today,
        "books_published": books_published,
        "status": {
            "checking_entity": checking,
            "checking_level": check_level,
            "comments": comments,
            "contributors": contrib,
            "publish_date": today,
            "source_text": source,
            "source_text_version": source_ver,
            "version": ver
        }
    }
    write_file('{0}/status.json'.format(out_dir), status)

    print('Publishing to the API...')
    with api_publish(out_dir) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

    print('Check {0} and do a git push'.format(out_dir))

Exemple #27

0

Afficher le fichier

Fichier : publish_bible_from_ts.py Projet : unfoldingWord-dev/uw-publish

def main(git_repo, tag, domain):
    global download_dir, out_template

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])  # str(datetime.date.today())
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    manifest = None
    metadata_obj = None
    content_dir = ''
    usfm_file = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'manifest.json' in files:
            # read the manifest
            try:
                print('Reading the manifest...', end=' ')
                manifest = load_json_object(os.path.join(root, 'manifest.json'))
                content_dir = root

                # look for the usfm file for the whole book
                found_usfm = glob(os.path.join(content_dir, '*.usfm'))
                if len(found_usfm) == 1:
                    usfm_file = os.path.join(content_dir, found_usfm[0])
            finally:
                print('finished.')

        if 'meta.json' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = BibleMetaData(os.path.join(root, 'meta.json'))
            finally:
                print('finished.')

        # if we have everything, exit the loop
        if manifest and metadata_obj:
            break

    # check for valid repository structure
    if not manifest:
        print_error('Did not find manifest.json in {}'.format(git_repo))
        sys.exit(1)

    if not metadata_obj:
        print_error('Did not find meta.json in {}'.format(git_repo))
        sys.exit(1)

    # get the versification data
    print('Getting versification info...', end=' ')
    vrs = Bible.get_versification(metadata_obj.versification)  # type: list<Book>

    # get the book object for this repository
    book = next((b for b in vrs if b.book_id.lower() == manifest['project']['id']), None)  # type: Book
    if not book:
        print_error('Book versification data was not found for "{}"'.format(manifest['project']['id']))
        sys.exit(1)
    print('finished')

    if usfm_file:
        read_unified_file(book, usfm_file)

    else:
        read_chunked_files(book, content_dir, metadata_obj)

    # do basic checks
    print('Running USFM checks...', end=' ')
    book.verify_chapters_and_verses(True)
    if book.validation_errors:
        print_error('These USFM errors must be corrected before publishing can continue.')
        sys.exit(1)
    else:
        print('finished.')

    # insert paragraph markers
    print('Inserting paragraph markers...', end=' ')
    Bible.insert_paragraph_markers(book)
    print('finished.')

    # get chunks for this book
    print('Chunking the text...', end=' ')
    Bible.chunk_book(metadata_obj.versification, book)
    book.apply_chunks()
    print('finished.')

    # save the output
    out_dir = out_template.format(domain, metadata_obj.slug)

    # produces something like '01-GEN.usfm'
    book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id)
    print('Writing ' + book_file_name + '...', end=' ')
    write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm)
    print('finished.')

    # look for an existing status.json file
    print('Updating the status for {0}...'.format(metadata_obj.lang), end=' ')
    status_file = '{0}/status.json'.format(out_dir)
    if os.path.isfile(status_file):
        status = BibleStatus(status_file)
    else:
        status = BibleStatus()

    status.update_from_meta_data(metadata_obj)

    # add this book to the list of "books_published"
    status.add_book_published(book)

    # update the "date_modified"
    status.date_modified = today
    print('finished.')

    # save the status.json file
    print('Writing status.json...', end=' ')
    status_json = json.dumps(status, sort_keys=True, indent=2, cls=BibleEncoder)
    write_file(status_file, status_json)
    print('finished')

    # let the API know it is there
    print('Publishing to the API...')
    with api_publish(out_dir) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

    print_notice('Check {0} and do a git push'.format(out_dir))

Exemple #28

0

Afficher le fichier

Fichier : import_bible_source.py Projet : unfoldingWord-dev/uw-publish

def main(resource, lang, slug, name, checking, contrib, ver, check_level,
         comments, source):

    global downloaded_file, unzipped_dir, out_template

    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])
    downloaded_file = '/tmp/{0}'.format(resource.rpartition('/')[2])
    unzipped_dir = '/tmp/{0}'.format(resource.rpartition('/')[2].strip('.zip'))
    out_dir = out_template.format(slug, lang)

    if not os.path.isfile(downloaded_file):
        download_file(resource, downloaded_file)

    unzip(downloaded_file, unzipped_dir)

    books_published = {}
    there_were_errors = False

    for root, dirs, files in os.walk(unzipped_dir):

        # only usfm files
        files = [f for f in files if f[-3:].lower() == 'sfm']

        if not len(files):
            continue

        # there are usfm files, which book is this?
        test_dir = root.rpartition('/')[2]
        book = Book.create_book(test_dir)  # type: Book

        if book:
            book_text = ''
            files.sort()

            for usfm_file in files:
                with codecs.open(os.path.join(root, usfm_file), 'r', 'utf-8') as in_file:
                    book_text += in_file.read() + '\n'

            book.set_usfm(book_text)
            book.clean_usfm()

            # do basic checks
            book.verify_usfm_tags()
            book.verify_chapters_and_verses()
            if len(book.validation_errors) > 0:
                there_were_errors = True

            if there_were_errors:
                continue

            # get chunks for this book
            book.apply_chunks()

            # produces something like '01-GEN.usfm'
            book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id)
            print('Writing ' + book_file_name)
            write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm)

            meta = ['Bible: OT']
            if book.number > 39:
                meta = ['Bible: NT']
            books_published[book.book_id.lower()] = {'name': book.name,
                                                     'meta': meta,
                                                     'sort': str(book.number).zfill(2),
                                                     'desc': ''
                                                     }

    if there_were_errors:
        print_warning('There are errors you need to fix before continuing.')
        exit()

    source_ver = ver
    if '.' in ver:
        source_ver = ver.split('.')[0]
    status = {"slug": '{0}-{1}'.format(slug.lower(), lang),
              "name": name,
              "lang": lang,
              "date_modified": today,
              "books_published": books_published,
              "status": {"checking_entity": checking,
                         "checking_level": check_level,
                         "comments": comments,
                         "contributors": contrib,
                         "publish_date": today,
                         "source_text": source,
                         "source_text_version": source_ver,
                         "version": ver
                         }
              }
    write_file('{0}/status.json'.format(out_dir), status)

    print('Publishing to the API...')
    with api_publish(out_dir) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

    print('Check {0} and do a git push'.format(out_dir))

Exemple #29

0

Afficher le fichier

Fichier : transform_obs.py Projet : unfoldingWord-dev/tx-md2html

    def run(self):
        # download the archive
        file_to_download = self.source_url
        filename = self.source_url.rpartition('/')[2]
        downloaded_file = os.path.join(self.download_dir, filename)
        self.log_message('Downloading {0}...'.format(file_to_download))
        if not os.path.isfile(downloaded_file):
            try:
                download_file(file_to_download, downloaded_file)
            finally:
                if not os.path.isfile(downloaded_file):
                    raise Exception("Failed to download {0}".format(file_to_download))
                else:
                    self.log_message('Download successful.')

        # unzip the archive
        self.log_message('Unzipping {0}...'.format(downloaded_file))
        unzip(downloaded_file, self.files_dir)
        self.log_message('Unzip successful.')

        # create output directory
        make_dir(self.output_dir)

        # read the markdown files and output html files
        self.log_message('Processing the OBS markdown files')

        files = sorted(glob(os.path.join(self.files_dir, '*')))

        current_dir = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(current_dir, 'obs-template.html')) as template_file:
            html_template = string.Template(template_file.read())

        complete_html = ''
        for filename in files:
            if filename.endswith('.md'):
                # read the markdown file
                with codecs.open(filename, 'r', 'utf-8-sig') as md_file:
                    md = md_file.read()
                html = markdown.markdown(md)
                complete_html += html
                html = html_template.safe_substitute(content=html)
                html_filename = os.path.splitext(os.path.basename(filename))[0] + ".html"
                output_file = os.path.join(self.output_dir, html_filename)
                write_file(output_file, html)
                self.log_message('Converted {0} to {1}.'.format(os.path.basename(filename), os.path.basename(html_filename)))
            else:
                try:
                    output_file = os.path.join(self.output_dir, filename[len(self.files_dir)+1:])
                    if not os.path.exists(output_file):
                        if not os.path.exists(os.path.dirname(output_file)):
                            os.makedirs(os.path.dirname(output_file))
                        copyfile(filename, output_file)
                except Exception:
                    pass

        # Do the OBS inspection
        inspector = OBSInspection(self.output_dir)
        try:
            inspector.run()
        except Exception as e:
            self.warning_message('Failed to run OBS inspector: {0}'.format(e.message))

        for warning in inspector.warnings:
            self.warning_message(warning)
        for error in inspector.errors:
            self.error_message(error)

        complete_html = html_template.safe_substitute(content=complete_html)
        write_file(os.path.join(self.output_dir, 'all.html'), complete_html)

        self.log_message('Made one HTML of all stories in all.html.')
        self.log_message('Finished processing Markdown files.')

Exemple #30

0

Afficher le fichier

Fichier : test_pipeline.py Projet : phillip-hopper/d43-pipeline-tests

 def mock_download_repo(source, target_dir):
     print('Mock downloading {}'.format(source))
     print('Unzipping to {}...'.format(target_dir), end=' ')
     unzip(os.path.join(TestPipeline.resources_dir, 'en-obs-master.zip'), target_dir)
     print('finished.')

Exemple #31

0

Afficher le fichier

Fichier : transform.py Projet : phillip-hopper/tx-md2html

    def run(self):

        try:
            self.temp_dir = tempfile.mkdtemp(prefix='txOBS_')

            # clean up the git repo url
            if self.source_repo_url[-4:] == '.git':
                self.source_repo_url = self.source_repo_url[:-4]

            if self.source_repo_url[-1:] == '/':
                self.source_repo_url = self.source_repo_url[:-1]

            # download the archive
            file_to_download = join_url_parts(self.source_repo_url, 'archive/master.zip')
            repo_dir = self.source_repo_url.rpartition('/')[2]
            downloaded_file = os.path.join(self.temp_dir, repo_dir + '.zip')
            try:
                print('Downloading {0}...'.format(file_to_download), end=' ')
                if not os.path.isfile(downloaded_file):
                    download_file(file_to_download, downloaded_file)
            finally:
                print('finished.')

            # unzip the archive
            try:
                print('Unzipping...'.format(downloaded_file), end=' ')
                unzip(downloaded_file, self.temp_dir)
            finally:
                print('finished.')

            # get the manifest
            try:
                print('Reading the manifest...', end=' ')
                manifest = load_json_object(os.path.join(self.temp_dir, 'manifest.json'))
            finally:
                print('finished.')

            # create output directory
            make_dir(self.output_directory)

            # read the markdown files and output html files
            try:
                print('Processing the OBS markdown files')
                files_to_process = []
                for i in range(1, 51):
                    files_to_process.append(str(i).zfill(2) + '.md')

                current_dir = os.path.dirname(inspect.stack()[0][1])
                with codecs.open(os.path.join(current_dir, 'template.html'), 'r', 'utf-8-sig') as html_file:
                    html_template = html_file.read()

                for file_to_process in files_to_process:

                    # read the markdown file
                    file_name = os.path.join(self.temp_dir, repo_dir, 'content', file_to_process)
                    with codecs.open(file_name, 'r', 'utf-8-sig') as md_file:
                        md = md_file.read()

                    html = markdown.markdown(md)
                    html = TransformOBS.dir_re.sub(r'\1\n' + html + r'\n\2', html_template)
                    write_file(os.path.join(self.output_directory, file_to_process.replace('.md', '.html')), html)

            except IOError as ioe:
                print_error('{0}: {1}'.format(ioe.strerror, ioe.filename))
                self.errors.append(ioe)

            except Exception as e:
                print_error(e.message)
                self.errors.append(e)

            finally:
                print('finished.')

        except Exception as e:
            print_error(e.message)
            self.errors.append(e)

Exemple #32

0

Afficher le fichier

Fichier : test_tw_linter.py Projet : unfoldingWord-dev/tx-job-handler

 def unzip_resource(self, zip_name):
     zip_file = os.path.join(self.resources_dir, zip_name)
     out_dir = tempfile.mkdtemp(dir=self.temp_dir, prefix='linter_test_')
     unzip(zip_file, out_dir)
     return out_dir

Exemple #33

0

Afficher le fichier

Fichier : import_bible.py Projet : unfoldingWord-dev/uw-publish

def main(git_repo, tag, domain):

    global download_dir, out_template

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    books_published = {}
    metadata_obj = None
    usfm_dir = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'meta.json' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = BibleMetaData(os.path.join(root, 'meta.json'))
            finally:
                print('finished.')

        if 'usfm' in dirs:
            usfm_dir = os.path.join(root, 'usfm')

        # if we have everything, exit the loop
        if usfm_dir and metadata_obj:
            break

    # check for valid repository structure
    if not metadata_obj:
        print_error('Did not find meta.json in {}'.format(git_repo))
        sys.exit(1)

    if not usfm_dir:
        print_error('Did not find the usfm directory in {}'.format(git_repo))
        sys.exit(1)

    # get the versification data
    vrs = Bible.get_versification(metadata_obj.versification)  # type: list<Book>
    out_dir = out_template.format(domain, metadata_obj.slug, metadata_obj.lang)

    # walk through the usfm files
    usfm_files = glob(os.path.join(usfm_dir, '*.usfm'))
    errors_found = False
    for usfm_file in usfm_files:

        # read the file
        with codecs.open(usfm_file, 'r', 'utf-8') as in_file:
            book_text = in_file.read()

        # get the book id
        book_search = id_re.search(book_text)
        if not book_search:
            print_error('Book id not found in {}'.format(usfm_file))
            sys.exit(1)

        book_id = book_search.group(1)

        print('Beginning {}...'.format(book_id), end=' ')

        # get book versification info
        book = next((b for b in vrs if b.book_id == book_id), None)
        if not book:
            print_error('Book versification data was not found for "{}"'.format(book_id))
            sys.exit(1)

        # remove \s5 lines
        book_text = s5_re.sub('', book_text)

        # get the usfm for the book
        book.set_usfm(book_text)

        # do basic checks
        book.verify_usfm_tags()
        book.verify_chapters_and_verses(True)
        if book.validation_errors:
            errors_found = True

        # get chunks for this book
        Bible.chunk_book(metadata_obj.versification, book)
        book.apply_chunks()

        # produces something like '01-GEN.usfm'
        book_file_name = '{0}-{1}.usfm'.format(str(book.number).zfill(2), book.book_id)
        print('Writing ' + book_file_name + '...', end=' ')
        write_file('{0}/{1}'.format(out_dir, book_file_name), book.usfm)

        meta = ['Bible: OT']
        if book.number > 39:
            meta = ['Bible: NT']
        books_published[book.book_id.lower()] = {'name': book.name,
                                                 'meta': meta,
                                                 'sort': str(book.number).zfill(2),
                                                 'desc': ''
                                                 }
        print('finished.')

    # stop if errors were found
    if errors_found:
        print_error('These USFM errors must be corrected before publishing can continue.')
        sys.exit(1)

    print('Writing status.json...', end=' ')
    status = {"slug": '{0}'.format(metadata_obj.slug.lower()),
              "name": metadata_obj.name,
              "lang": metadata_obj.lang,
              "date_modified": today,
              "books_published": books_published,
              "status": {"checking_entity": metadata_obj.checking_entity,
                         "checking_level": metadata_obj.checking_level,
                         "comments": metadata_obj.comments,
                         "contributors": metadata_obj.contributors,
                         "publish_date": today,
                         "source_text": metadata_obj.source_text,
                         "source_text_version": metadata_obj.source_text_version,
                         "version": metadata_obj.version
                         }
              }
    write_file('{0}/status.json'.format(out_dir), status, indent=2)
    print('finished.')

    print()
    print('Publishing to the API...')
    with api_publish(out_dir) as api:
        api.run()
    print('Finished publishing to the API.')

    # update the catalog
    print()
    print('Updating the catalogs...', end=' ')
    update_catalog()
    print('finished.')

    print_notice('Check {0} and do a git push'.format(out_dir))

Exemple #34

0

Afficher le fichier

Fichier : convert.py Projet : unfoldingWord-dev/tx-webhook

                                        #========================================

                                        try:
                                            print( 'Downloading converted file from: {0} to: {1} ...'.format( convertedZipUrl, convertedZipFile ), end=' ')
                                            download_file( convertedZipUrl, convertedZipFile )
                                        finally:
                                            print( 'finished download.' )

                                      # Unzip the archive
                                        door43Dir = tempfile.mkdtemp( prefix='door43_' )

                                        if True:
                                        #if os.path.exists( convertedZipFile ):
                                            try:
                                                print( 'Unzipping {0}...'.format( convertedZipFile), end=' ' )
                                                unzip( convertedZipFile, door43Dir )
                                            finally:
                                                print( 'finished unzip.' )
                                            usr = '******' + payload['repository']['owner']['username']
                                            s3ProjectKey = os.path.join( usr, repoName, hash )
                                            print( "s3ProjectKey: " + s3ProjectKey )
                                        else:
                                            print( 'Nothing downloaded' )

                                        # Delete existing files in door43.org for this Project Key
                                        s3Resource = boto3.resource( 's3' )
                                        s3Bucket = s3Resource.Bucket( door43Bucket )

                                        for obj in s3Bucket.objects.filter( Prefix=s3ProjectKey ):
                                            s3Resource.Object( s3Bucket.name, obj.key ).delete()

Exemple #35

0

Afficher le fichier

Fichier : publish_obs_from_ts.py Projet : unfoldingWord-dev/uw-publish

def main(git_repo, tag, no_pdf):
    global download_dir

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    today = ''.join(str(datetime.date.today()).rsplit('-')[0:3])  # str(datetime.date.today())
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    manifest = None
    status = None  # type: OBSStatus
    content_dir = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'manifest.json' in files:
            # read the manifest
            try:
                print('Reading the manifest...', end=' ')
                content_dir = root
                manifest = load_json_object(os.path.join(root, 'manifest.json'))
            finally:
                print('finished.')

        if 'status.json' in files:
            # read the meta data
            try:
                print('Reading the status...', end=' ')
                content_dir = root
                status = OBSStatus(os.path.join(root, 'status.json'))
            finally:
                print('finished.')

        # if we have everything, exit the loop
        if content_dir and manifest and status:
            break

    # check for valid repository structure
    if not manifest:
        print_error('Did not find manifest.json in {}'.format(git_repo))
        sys.exit(1)

    if not status:
        print_error('Did not find status.json in {}'.format(git_repo))
        sys.exit(1)

    print('Initializing OBS object...', end=' ')
    lang = manifest['target_language']['id']
    obs_obj = OBS()
    obs_obj.date_modified = today
    obs_obj.direction = manifest['target_language']['direction']
    obs_obj.language = lang
    print('finished')

    obs_obj.chapters = load_obs_chapters(content_dir)
    obs_obj.chapters.sort(key=lambda c: c['number'])

    if not obs_obj.verify_all():
        print_error('Quality check did not pass.')
        sys.exit(1)

    print('Loading languages...', end=' ')
    lang_dict = OBS.load_lang_strings()
    print('finished.')

    print('Loading the catalog...', end=' ')
    export_dir = '/var/www/vhosts/door43.org/httpdocs/exports'
    # uw_cat_path = os.path.join(unfoldingWord_dir, 'obs-catalog.json')
    # uw_catalog = load_json_object(uw_cat_path, [])
    # uw_cat_langs = [x['language'] for x in uw_catalog]
    cat_path = os.path.join(export_dir, 'obs-catalog.json')
    catalog = load_json_object(cat_path, [])
    print('finished')

    print('Getting already published languages...', end=' ')
    json_lang_file_path = os.path.join(export_dir, lang, 'obs', 'obs-{0}.json'.format(lang))
    # prev_json_lang = load_json_object(json_lang_file_path, {})

    if lang not in lang_dict:
        print("Configuration for language {0} missing.".format(lang))
        sys.exit(1)
    print('finished.')

    updated = update_language_catalog(lang, obs_obj.direction, status, today, lang_dict, catalog)

    print('Writing the OBS file to the exports directory...', end=' ')
    cur_json = json.dumps(obs_obj, sort_keys=True, cls=OBSEncoder)

    if updated:
        ([x for x in catalog if x['language'] == lang][0]['date_modified']) = today
        write_file(json_lang_file_path.replace('.txt', '.json'), cur_json)
    print('finished.')

    export_to_api(lang, status, today, cur_json)

    cat_json = json.dumps(catalog, sort_keys=True, cls=OBSEncoder)
    write_file(cat_path, cat_json)

    # update the catalog
    print_ok('STARTING: ', 'updating the catalogs.')
    update_catalog()
    print_ok('FINISHED: ', 'updating the catalogs.')

    if no_pdf:
        return

    create_pdf(lang, status.checking_level, status.version)

Exemple #36

0

Afficher le fichier

def handle(event, context):
    # Getting data from payload which is the JSON that was sent from tx-manager
    if 'data' not in event:
        raise Exception('"data" not in payload')
    job = event['data']

    env_vars = {}
    if 'vars' in event and isinstance(event['vars'], dict):
        env_vars = event['vars']

    # Getting the bucket to where we will unzip the converted files for door43.org. It is different from
    # production and testing, thus it is an environment variable the API Gateway gives us
    if 'cdn_bucket' not in env_vars:
        raise Exception('"cdn_bucket" was not in payload')
    cdn_handler = S3Handler(env_vars['cdn_bucket'])

    if 'identifier' not in job or not job['identifier']:
        raise Exception('"identifier" not in payload')

    owner_name, repo_name, commit_id = job['identifier'].split('/')

    s3_commit_key = 'u/{0}/{1}/{2}'.format(
        owner_name, repo_name, commit_id
    )  # The identifier is how to know which username/repo/commit this callback goes to

    # Download the ZIP file of the converted files
    converted_zip_url = job['output']
    converted_zip_file = os.path.join(tempfile.gettempdir(),
                                      converted_zip_url.rpartition('/')[2])
    try:
        print('Downloading converted zip file from {0}...'.format(
            converted_zip_url))
        if not os.path.isfile(converted_zip_file):
            download_file(converted_zip_url, converted_zip_file)
    finally:
        print('finished.')

    # Unzip the archive
    unzip_dir = tempfile.mkdtemp(prefix='unzip_')
    try:
        print('Unzipping {0}...'.format(converted_zip_file))
        unzip(converted_zip_file, unzip_dir)
    finally:
        print('finished.')

    # Upload all files to the cdn_bucket with the key of <user>/<repo_name>/<commit> of the repo
    for root, dirs, files in os.walk(unzip_dir):
        for f in sorted(files):
            path = os.path.join(root, f)
            key = s3_commit_key + path.replace(unzip_dir, '')
            print('Uploading {0} to {1}'.format(f, key))
            cdn_handler.upload_file(path, key)

    # Now download the existing build_log.json file, update it and upload it back to S3
    build_log_json = cdn_handler.get_json(s3_commit_key + '/build_log.json')

    build_log_json['started_at'] = job['started_at']
    build_log_json['ended_at'] = job['ended_at']
    build_log_json['success'] = job['success']
    build_log_json['status'] = job['status']
    build_log_json['message'] = job['message']

    if 'log' in job and job['log']:
        build_log_json['log'] = job['log']
    else:
        build_log_json['log'] = []

    if 'warnings' in job and job['warnings']:
        build_log_json['warnings'] = job['warnings']
    else:
        build_log_json['warnings'] = []

    if 'errors' in job and job['errors']:
        build_log_json['errors'] = job['errors']
    else:
        build_log_json['errors'] = []

    build_log_file = os.path.join(tempfile.gettempdir(),
                                  'build_log_finished.json')
    write_file(build_log_file, build_log_json)
    cdn_handler.upload_file(build_log_file, s3_commit_key + '/build_log.json',
                            0)

    # Download the project.json file for this repo (create it if doesn't exist) and update it
    project_json_key = 'u/{0}/{1}/project.json'.format(owner_name, repo_name)
    project_json = cdn_handler.get_json(project_json_key)

    project_json['user'] = owner_name
    project_json['repo'] = repo_name
    project_json['repo_url'] = 'https://git.door43.org/{0}/{1}'.format(
        owner_name, repo_name)

    commit = {
        'id': commit_id,
        'created_at': job['created_at'],
        'status': job['status'],
        'success': job['success'],
        'started_at': None,
        'ended_at': None
    }
    if 'started_at' in job:
        commit['started_at'] = job['started_at']
    if 'ended_at' in job:
        commit['ended_at'] = job['ended_at']

    if 'commits' not in project_json:
        project_json['commits'] = []

    commits = []
    for c in project_json['commits']:
        if c['id'] != commit_id:
            commits.append(c)
    commits.append(commit)
    project_json['commits'] = commits

    project_file = os.path.join(tempfile.gettempdir(), 'project.json')
    write_file(project_file, project_json)
    cdn_handler.upload_file(project_file, project_json_key, 0)

    print('Finished deploying to cdn_bucket. Done.')

Exemple #37

0

Afficher le fichier

def main(git_repo, tag):
    global download_dir

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir,
                                           git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    metadata_obj = None
    content_dir = None
    toc_obj = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'meta.yaml' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = TAMetaData(os.path.join(root, 'meta.yaml'))
            finally:
                print('finished.')

        if 'toc.yaml' in files:
            # read the table of contents
            try:
                print('Reading the toc...', end=' ')
                toc_obj = TATableOfContents(os.path.join(root, 'toc.yaml'))
            finally:
                print('finished.')

        if 'content' in dirs:
            content_dir = os.path.join(root, 'content')

        # if we have everything, exit the loop
        if content_dir and metadata_obj and toc_obj:
            break

    # check for valid repository structure
    if not metadata_obj:
        print_error('Did not find meta.yaml in {}'.format(git_repo))
        sys.exit(1)

    if not content_dir:
        print_error(
            'Did not find the content directory in {}'.format(git_repo))
        sys.exit(1)

    if not toc_obj:
        print_error('Did not find toc.yaml in {}'.format(git_repo))
        sys.exit(1)

    # check for missing pages
    check_missing_pages(toc_obj, content_dir)

    # generate the pages
    print('Generating the manual...', end=' ')
    manual = TAManual(metadata_obj, toc_obj)
    manual.load_pages(content_dir)
    print('finished.')

    file_name = os.path.join(
        get_output_dir(), '{0}_{1}.json'.format(manual.meta.manual,
                                                manual.meta.volume))
    print('saving to {0} ...'.format(file_name), end=' ')
    content = json.dumps(manual, sort_keys=True, indent=2, cls=TAEncoder)
    write_file(file_name, content)
    print('finished.')

Exemple #38

0

Afficher le fichier

Fichier : publish_ta.py Projet : unfoldingWord-dev/uw-publish

def main(git_repo, tag):
    global download_dir

    # clean up the git repo url
    if git_repo[-4:] == '.git':
        git_repo = git_repo[:-4]

    if git_repo[-1:] == '/':
        git_repo = git_repo[:-1]

    # initialize some variables
    download_dir = '/tmp/{0}'.format(git_repo.rpartition('/')[2])
    make_dir(download_dir)
    downloaded_file = '{0}/{1}.zip'.format(download_dir, git_repo.rpartition('/')[2])
    file_to_download = join_url_parts(git_repo, 'archive/' + tag + '.zip')
    metadata_obj = None
    content_dir = None
    toc_obj = None

    # download the repository
    try:
        print('Downloading {0}...'.format(file_to_download), end=' ')
        if not os.path.isfile(downloaded_file):
            download_file(file_to_download, downloaded_file)
    finally:
        print('finished.')

    try:
        print('Unzipping...'.format(downloaded_file), end=' ')
        unzip(downloaded_file, download_dir)
    finally:
        print('finished.')

    # examine the repository
    for root, dirs, files in os.walk(download_dir):

        if 'meta.yaml' in files:
            # read the metadata
            try:
                print('Reading the metadata...', end=' ')
                metadata_obj = TAMetaData(os.path.join(root, 'meta.yaml'))
            finally:
                print('finished.')

        if 'toc.yaml' in files:
            # read the table of contents
            try:
                print('Reading the toc...', end=' ')
                toc_obj = TATableOfContents(os.path.join(root, 'toc.yaml'))
            finally:
                print('finished.')

        if 'content' in dirs:
            content_dir = os.path.join(root, 'content')

        # if we have everything, exit the loop
        if content_dir and metadata_obj and toc_obj:
            break

    # check for valid repository structure
    if not metadata_obj:
        print_error('Did not find meta.yaml in {}'.format(git_repo))
        sys.exit(1)

    if not content_dir:
        print_error('Did not find the content directory in {}'.format(git_repo))
        sys.exit(1)

    if not toc_obj:
        print_error('Did not find toc.yaml in {}'.format(git_repo))
        sys.exit(1)

    # check for missing pages
    check_missing_pages(toc_obj, content_dir)

    # generate the pages
    print('Generating the manual...', end=' ')
    manual = TAManual(metadata_obj, toc_obj)
    manual.load_pages(content_dir)
    print('finished.')

    file_name = os.path.join(get_output_dir(), '{0}_{1}.json'.format(manual.meta.manual, manual.meta.volume))
    print('saving to {0} ...'.format(file_name), end=' ')
    content = json.dumps(manual, sort_keys=True, indent=2, cls=TAEncoder)
    write_file(file_name, content)
    print('finished.')