Python remove_tree Examples, libraries.general_tools.file_utils.remove_tree Python Examples

Example #1

0

Show file

File: test_tn_linter.py Project: mondele/tx-manager

    def test_lint_overflow_warnings(self, mock_invoke_markdown_linter):
        # given
        warning = {
            'errorContext': 'dummy error message',
            'lineNumber': 42,
            'ruleDescription': 'dummy rule'
        }
        warnings = []
        warning_count = 202
        for i in range(0, warning_count):
            warnings.append(warning)
        mock_invoke_markdown_linter.return_value = {  # Don't care about markdown linting here, just specific tw linting
            '/tmp/tmp_lint_EYZ5zV/en_tn/2th/front/intro.md': warnings
        }
        expected_warnings = 200  # should be limited
        zip_file = os.path.join(self.resources_dir, 'tn_linter', 'en_tn.zip')
        out_dir = self.unzip_resource(zip_file)

        # remove everything past genesis
        for dir in BOOK_NUMBERS:
            book = '{0}-{1}'.format(BOOK_NUMBERS[dir], dir.upper())
            link = self.get_link_for_book(book)
            book_path = os.path.join(out_dir, 'en_tn', link)
            if os.path.exists(book_path):
                if book > "02":
                    file_utils.remove_tree(book_path)

        new_zip = self.create_new_zip(out_dir)
        linter = TnLinter(source_file=new_zip, commit_data=self.commit_data)

        # when
        results = linter.run()

        # then
        self.assertEqual(len(results['warnings']), expected_warnings)

Example #2

0

Show file

File: test_tq_linter.py Project: mondele/tx-manager

    def test_lint_broken_links(self, mock_invoke_markdown_linter):
        # given
        mock_invoke_markdown_linter.return_value = {
        }  # Don't care about markdown linting here, just specific tw linting
        expected_warnings = 66 - 2  # we only leave 2 books
        zip_file = os.path.join(self.resources_dir, 'tq_linter', 'en_tq.zip')
        out_dir = self.unzip_resource(zip_file)

        # remove everything past genesis
        for book in BOOK_NUMBERS:
            book_path = os.path.join(out_dir, 'en_tq', book)
            if os.path.exists(book_path):
                if BOOK_NUMBERS[book] > "02":
                    file_utils.remove_tree(book_path)

        # put a verse in exo so that we can test that there is some content there
        file_path = os.path.join(out_dir, 'en_tq/exo/01/05.md')
        file_utils.write_file(file_path, 'dummy')

        # create chapter in lev with no md files so that we can test that there is no content there
        file_path = os.path.join(
            os.path.join(out_dir, 'en_tq/lev/01/readme.txt'))
        file_utils.write_file(file_path, 'dummy')

        new_zip = self.create_new_zip(out_dir)
        linter = TqLinter(source_file=new_zip, commit_data=self.commit_data)

        # when
        linter.run()

        # then
        self.verify_results_warnings_count(expected_warnings, linter)

Example #3

0

Show file

File: test_tn_linter.py Project: unfoldingWord-dev/tx-manager

    def test_lint_overflow_warnings(self, mock_invoke_markdown_linter):
        # given
        warning = {'errorContext': 'dummy error message', 'lineNumber': 42, 'ruleDescription': 'dummy rule'}
        warnings = []
        warning_count = 202
        for i in range(0, warning_count):
            warnings.append(warning)
        mock_invoke_markdown_linter.return_value = {  # Don't care about markdown linting here, just specific tw linting
            '/tmp/tmp_lint_EYZ5zV/en_tn/2th/front/intro.md': warnings
        }
        expected_warnings = 200  # should be limited
        zip_file = os.path.join(self.resources_dir, 'tn_linter', 'en_tn.zip')
        out_dir = self.unzip_resource(zip_file)

        # remove everything past genesis
        for dir in BOOK_NUMBERS:
            book = '{0}-{1}'.format(BOOK_NUMBERS[dir], dir.upper())
            link = self.get_link_for_book(book)
            book_path = os.path.join(out_dir, 'en_tn', link)
            if os.path.exists(book_path):
                if book > "02":
                    file_utils.remove_tree(book_path)

        new_zip = self.create_new_zip(out_dir)
        linter = TnLinter(source_file=new_zip, commit_data=self.commit_data)

        # when
        results = linter.run()

        # then
        self.assertEqual(len(results['warnings']), expected_warnings)

Example #4

0

Show file

File: client_linter_callback.py Project: mondele/tx-manager

    def process_callback(self):
        if not self.identifier:
            error = 'No identifier found'
            App.logger.error(error)
            raise Exception(error)

        if not self.s3_results_key:
            error = 'No s3_results_key found for identifier = {0}'.format(self.identifier)
            App.logger.error(error)
            raise Exception(error)

        id_parts = self.identifier.split('/')
        self.multipart = len(id_parts) > 3
        if self.multipart:
            part_count, part_id, book = id_parts[1:4]
            App.logger.debug('Multiple project, part {0} of {1}, linted book {2}'.
                             format(part_id, part_count, book))
            s3__master_results_key = '/'.join(self.s3_results_key.split('/')[:-1])
        else:
            App.logger.debug('Single project')
            s3__master_results_key = self.s3_results_key

        build_log = {
            'identifier': self.identifier,
            'success': self.success,
            'multipart_project': self.multipart,
            'log': self.log,
            'warnings': self.warnings,
            'errors': self.errors,
            's3_commit_key': self.s3_results_key
        }

        if not self.success:
            msg = "Linter failed for identifier: " + self.identifier
            build_log['warnings'].append(msg)
            App.logger.error(msg)
        else:
            App.logger.debug("Linter {0} {1} warnings:\n{1}".format(self.identifier, len(self.warnings),
                                                                    '\n'.join(self.warnings[:5])))

        has_warnings = len(build_log['warnings']) > 0
        if has_warnings:
            msg = "Linter {0} has Warnings!".format(self.identifier)
            build_log['log'].append(msg)
        else:
            msg = "Linter {0} completed with no warnings".format(self.identifier)
            build_log['log'].append(msg)

        ClientLinterCallback.upload_build_log(build_log, 'lint_log.json', self.temp_dir, self.s3_results_key)

        results = ClientLinterCallback.deploy_if_conversion_finished(s3__master_results_key, self.identifier)
        if results:
            self.all_parts_completed = True
            build_log = results

        remove_tree(self.temp_dir)  # cleanup
        App.db_close()
        return build_log

Example #5

0

Show file

    def run(self):
        """
        Call the converters
        """
        success = False
        try:
            if not self.input_zip_file or not os.path.exists(
                    self.input_zip_file):
                # No input zip file yet, so we need to download the archive
                self.download_archive()
            # unzip the input archive
            App.logger.debug("Unzipping {0} to {1}".format(
                self.input_zip_file, self.files_dir))
            unzip(self.input_zip_file, self.files_dir)
            # convert method called
            App.logger.debug("Converting files...")
            if self.convert():
                App.logger.debug("Was able to convert {0}".format(
                    self.resource))
                # zip the output dir to the output archive
                App.logger.debug("Adding files in {0} to {1}".format(
                    self.output_dir, self.output_zip_file))
                add_contents_to_zip(self.output_zip_file, self.output_dir)
                remove_tree(self.output_dir)
                # upload the output archive either to cdn_bucket or to a file (no cdn_bucket)
                App.logger.debug("Uploading archive to {0}/{1}".format(
                    self.cdn_bucket, self.cdn_file))
                self.upload_archive()
                remove(self.output_zip_file)
                App.logger.debug("Uploaded")
                success = True
            else:
                self.log.error('Resource {0} currently not supported.'.format(
                    self.resource))
        except Exception as e:
            self.log.error('Conversion process ended abnormally: {0}'.format(
                e.message))
            App.logger.error('{0}: {1}'.format(str(e), traceback.format_exc()))

        results = {
            'identifier': self.identifier,
            'success': success and len(self.log.logs['error']) == 0,
            'info': self.log.logs['info'],
            'warnings': self.log.logs['warning'],
            'errors': self.log.logs['error']
        }

        if self.callback is not None:
            self.callback_results = results
            self.do_callback(self.callback, self.callback_results)

        App.logger.debug(results)
        return results

Example #6

0

Show file

File: client_linter_callback.py Project: mondele/tx-manager

    def deploy_if_conversion_finished(s3_results_key, identifier):
        """
        check if all parts are finished, and if so then save merged build_log as well as update jobs table
        :param s3_results_key: format - u/user/repo/commid_id
        :param identifier: either
                    job_id/part_count/part_id/book if multi-part job
                        or
                    job_id if single job
        :return:
        """
        output_dir = tempfile.mkdtemp(suffix="", prefix="client_callback_deploy_")
        build_log = None
        id_parts = identifier.split('/')
        multiple_project = len(id_parts) > 3
        all_parts_completed = True

        if not multiple_project:
            App.logger.debug('Single job: checking if convert and lint have completed.')
            build_log = ClientLinterCallback.merge_build_status_for_part(build_log, s3_results_key, output_dir)
        else:
            App.logger.debug('Multiple parts: Checking if all parts completed.')
            job_id, part_count, part_id, book = id_parts[:4]
            for i in range(0, int(part_count)):
                part_key = "{0}/{1}".format(s3_results_key, i)
                build_log = ClientLinterCallback.merge_build_status_for_part(build_log, part_key, output_dir)
                if build_log is None:
                    App.logger.debug('Part {0} not complete'.format(part_key))
                    all_parts_completed = False

        if all_parts_completed and build_log is not None:  # if all parts found, save build log and kick off deploy
            # set overall status
            if len(build_log['errors']):
                build_log['status'] = 'errors'
            elif len(build_log['warnings']):
                build_log['status'] = 'warnings'
            build_log['ended_at'] = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
            if multiple_project:
                build_log['multiple'] = True

            ClientLinterCallback.upload_build_log(build_log, "final_build_log.json", output_dir, s3_results_key)
            if not multiple_project:
                ClientLinterCallback.upload_build_log(build_log, "build_log.json", output_dir, s3_results_key)
            ClientLinterCallback.update_project_file(build_log, output_dir)
            App.logger.debug('All parts completed')
        else:
            App.logger.debug('Not all parts completed')
            build_log = None

        file_utils.remove_tree(output_dir)
        return build_log

Example #7

0

Show file

File: test_tn_linter.py Project: mondele/tx-manager

    def test_lint_broken_links(self, mock_invoke_markdown_linter):
        # given
        mock_invoke_markdown_linter.return_value = {  # Don't care about markdown linting here, just specific tw linting
            '/tmp/tmp_lint_EYZ5zV/en_tn/2th/front/intro.md': [{
                'errorContext':
                'dummy error message',
                'lineNumber':
                42,
                'ruleDescription':
                'dummy rule'
            }]
        }
        expected_warnings = 64 + 1  # 64 missing books + 1 markdown warning
        zip_file = os.path.join(self.resources_dir, 'tn_linter', 'en_tn.zip')
        out_dir = self.unzip_resource(zip_file)

        # remove everything past genesis
        for dir in BOOK_NUMBERS:
            book = '{0}-{1}'.format(BOOK_NUMBERS[dir], dir.upper())
            link = self.get_link_for_book(book)
            book_path = os.path.join(out_dir, 'en_tn', link)
            if os.path.exists(book_path):
                if book > "02":
                    file_utils.remove_tree(book_path)

        # put a verse in exo so that we can test that there is some content there
        file_path = os.path.join(out_dir, 'en_tn/exo/01/05.md')
        file_utils.write_file(file_path, 'dummy')

        # create chapter in lev with no md files so that we can test that there is no content there
        file_path = os.path.join(
            os.path.join(out_dir, 'en_tn/lev/01/readme.txt'))
        file_utils.write_file(file_path, 'dummy')

        new_zip = self.create_new_zip(out_dir)
        linter = TnLinter(source_file=new_zip, commit_data=self.commit_data)

        # when
        linter.run()

        # then
        self.verify_results_warnings_count(expected_warnings, linter)

Example #8

0

Show file

File: test_tn_linter.py Project: unfoldingWord-dev/tx-manager

    def test_lint_broken_links(self, mock_invoke_markdown_linter):
        # given
        mock_invoke_markdown_linter.return_value = {  # Don't care about markdown linting here, just specific tw linting
            '/tmp/tmp_lint_EYZ5zV/en_tn/2th/front/intro.md':
                [
                    {
                        'errorContext': 'dummy error message',
                        'lineNumber': 42,
                        'ruleDescription': 'dummy rule'
                    }
                ]
        }
        expected_warnings = 64 + 1  # 64 missing books + 1 markdown warning
        zip_file = os.path.join(self.resources_dir, 'tn_linter', 'en_tn.zip')
        out_dir = self.unzip_resource(zip_file)

        # remove everything past genesis
        for dir in BOOK_NUMBERS:
            book = '{0}-{1}'.format(BOOK_NUMBERS[dir], dir.upper())
            link = self.get_link_for_book(book)
            book_path = os.path.join(out_dir, 'en_tn', link)
            if os.path.exists(book_path):
                if book > "02":
                    file_utils.remove_tree(book_path)

        # put a verse in exo so that we can test that there is some content there
        file_path = os.path.join(out_dir, 'en_tn/exo/01/05.md')
        file_utils.write_file(file_path, 'dummy')

        # create chapter in lev with no md files so that we can test that there is no content there
        file_path = os.path.join(os.path.join(out_dir, 'en_tn/lev/01/readme.txt'))
        file_utils.write_file(file_path, 'dummy')

        new_zip = self.create_new_zip(out_dir)
        linter = TnLinter(source_file=new_zip, commit_data=self.commit_data)

        # when
        linter.run()

        # then
        self.verify_results_warnings_count(expected_warnings, linter)

Example #9

0

Show file

File: client_webhook.py Project: unfoldingWord-dev/tx-manager

    def process_webhook(self):
        # Check that we got commit data
        if not self.commit_data:
            raise Exception('No commit data from DCS was found in the Payload')

        # Check that the user token is valid
        if not App.gogs_user_token:
            raise Exception('DCS user token not given in Payload.')
        user = App.gogs_handler().get_user(App.gogs_user_token)
        if not user:
            raise Exception('Invalid DCS user token given in Payload')

        # Check that the URL to the DCS repo is valid
        if not self.commit_data['repository']['html_url'].startswith(App.gogs_url):
            raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url))

        # Check that commit is on repo's default branch, else quit
        try:
            commit_branch = self.commit_data['ref'].split('/')[2]
        except IndexError:
            raise Exception('Could not determine commit branch, exiting.')
        except KeyError:
            Exception('This does not appear to be a push, exiting.')
        if commit_branch != self.commit_data['repository']['default_branch']:
            raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch))

        # Get the commit_id, commit_url
        commit_id = self.commit_data['after']
        commit = None
        for commit in self.commit_data['commits']:
            if commit['id'] == commit_id:
                break
        commit_id = commit_id[:10]  # Only use the short form
        commit_url = commit['url']


        # Gather other details from the commit that we will note for the job(s)
        user_name = self.commit_data['repository']['owner']['username']
        repo_name = self.commit_data['repository']['name']
        compare_url = self.commit_data['compare_url']
        commit_message = commit['message']

        if 'pusher' in self.commit_data:
            pusher = self.commit_data['pusher']
        else:
            pusher = {'username': commit['author']['username']}
        pusher_username = pusher['username']

        # Download and unzip the repo files
        repo_dir = self.get_repo_files(commit_url, repo_name)

        # Get the resource container
        rc = RC(repo_dir, repo_name)

        # Save manifest to manifest table
        manifest_data = {
            'repo_name': repo_name,
            'user_name': user_name,
            'lang_code': rc.resource.language.identifier,
            'resource_id': rc.resource.identifier,
            'resource_type': rc.resource.type,
            'title': rc.resource.title,
            'manifest': json.dumps(rc.as_dict()),
            'last_updated': datetime.utcnow()
        }
        # First see if manifest already exists in DB and update it if it is
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        if tx_manifest:
            for key, value in manifest_data.iteritems():
                setattr(tx_manifest, key, value)
            App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data))
            tx_manifest.update()
        else:
            tx_manifest = TxManifest(**manifest_data)
            App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest))
            tx_manifest.insert()

        # Preprocess the files
        preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_')
        results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir)

        # Zip up the massaged files
        zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip')
        App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath))
        add_contents_to_zip(zip_filepath, preprocess_dir)
        App.logger.debug('finished.')

        # Upload zipped file to the S3 bucket
        file_key = self.upload_zip_file(commit_id, zip_filepath)

        job = TxJob()
        job.job_id = self.get_unique_job_id()
        job.identifier = job.job_id
        job.user_name = user_name
        job.repo_name = repo_name
        job.commit_id = commit_id
        job.manifests_id = tx_manifest.id
        job.created_at = datetime.utcnow()
        job.user = user.username  # Username of the token, not necessarily the repo's owner
        job.input_format = rc.resource.file_ext
        job.resource_type = rc.resource.identifier
        job.source = self.source_url_base + "/" + file_key
        job.cdn_bucket = App.cdn_bucket
        job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id)
        job.output = 'https://{0}/{1}'.format(App.cdn_bucket, job.cdn_file)
        job.callback = App.api_url + '/client/callback'
        job.output_format = 'html'
        job.links = {
            "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id),
            "rel": "self",
            "method": "GET"
        }
        job.success = False

        converter = self.get_converter_module(job)
        linter = self.get_linter_module(job)

        if converter:
            job.convert_module = converter.name
            job.started_at = datetime.utcnow()
            job.expires_at = job.started_at + timedelta(days=1)
            job.eta = job.started_at + timedelta(minutes=5)
            job.status = 'started'
            job.message = 'Conversion started...'
            job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id))
        else:
            job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type,
                                                                                             job.input_format,
                                                                                             job.output_format))
            job.message = 'No converter found'
            job.status = 'failed'

        if linter:
            job.lint_module = linter.name
        else:
            App.logger.debug('No linter was found to lint {0}'.format(job.resource_type))

        job.insert()

        # Get S3 bucket/dir ready
        s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)
        self.clear_commit_directory_in_cdn(s3_commit_key)

        # Create a build log
        build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job,
                                               pusher_username, repo_name, user_name)
        # Upload an initial build_log
        self.upload_build_log_to_s3(build_log_json, s3_commit_key)

        # Update the project.json file
        self.update_project_json(commit_id, job, repo_name, user_name)

        # Convert and lint
        if converter:
            if not preprocessor.is_multiple_jobs():
                self.send_request_to_converter(job, converter)
                if linter:
                    extra_payload = {
                        's3_results_key': s3_commit_key
                    }
                    self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
            else:
                # -----------------------------
                # multiple book project
                # -----------------------------
                books = preprocessor.get_book_list()
                App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))
                book_count = len(books)
                build_log_json['multiple'] = True
                build_log_json['build_logs'] = []
                for i in range(0, len(books)):
                    book = books[i]
                    App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count))
                    # Send job request to tx-manager
                    if i == 0:
                        book_job = job  # use the original job created above for the first book
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book)
                    else:
                        book_job = job.clone()  # copy the original job for this book's job
                        book_job.job_id = self.get_unique_job_id()
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book)
                        book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id)
                        book_job.output = 'https://{0}/{1}'.format(App.cdn_bucket, book_job.cdn_file)
                        book_job.links = {
                            "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id),
                            "rel": "self",
                            "method": "GET"
                        }
                        book_job.insert()

                    book_job.source = self.build_multipart_source(file_key, book)
                    book_job.update()
                    book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job,
                                                           pusher_username, repo_name, user_name)
                    if len(book) > 0:
                        part = str(i)
                        book_build_log['book'] = book
                        book_build_log['part'] = part
                    build_log_json['build_logs'].append(book_build_log)
                    self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/")
                    self.send_request_to_converter(book_job, converter)
                    if linter:
                        extra_payload = {
                            'single_file': book,
                            's3_results_key': '{0}/{1}'.format(s3_commit_key, i)
                        }
                        self.send_request_to_linter(book_job, linter, commit_url, extra_payload)

        remove_tree(self.base_temp_dir)  # cleanup
        return build_log_json

Example #10

0

Show file

    def process_webhook(self):
        # Check that we got commit data
        if not self.commit_data:
            raise Exception('No commit data from DCS was found in the Payload')

        # Check that the user token is valid
        if not App.gogs_user_token:
            raise Exception('DCS user token not given in Payload.')
        user = App.gogs_handler().get_user(App.gogs_user_token)
        if not user:
            raise Exception('Invalid DCS user token given in Payload')

        # Check that the URL to the DCS repo is valid
        if not self.commit_data['repository']['html_url'].startswith(App.gogs_url):
            raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url))

        # Check that commit is on repo's default branch, else quit
        try:
            commit_branch = self.commit_data['ref'].split('/')[2]
        except IndexError:
            raise Exception('Could not determine commit branch, exiting.')
        except KeyError:
            Exception('This does not appear to be a push, exiting.')
        if commit_branch != self.commit_data['repository']['default_branch']:
            raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch))

        # Get the commit_id, commit_url
        commit_id = self.commit_data['after']
        commit = None
        for commit in self.commit_data['commits']:
            if commit['id'] == commit_id:
                break
        commit_id = commit_id[:10]  # Only use the short form
        commit_url = commit['url']


        # Gather other details from the commit that we will note for the job(s)
        user_name = self.commit_data['repository']['owner']['username']
        repo_name = self.commit_data['repository']['name']
        compare_url = self.commit_data['compare_url']
        commit_message = commit['message']

        if 'pusher' in self.commit_data:
            pusher = self.commit_data['pusher']
        else:
            pusher = {'username': commit['author']['username']}
        pusher_username = pusher['username']

        # Download and unzip the repo files
        repo_dir = self.get_repo_files(commit_url, repo_name)

        # Get the resource container
        rc = RC(repo_dir, repo_name)

        # Save manifest to manifest table
        manifest_data = {
            'repo_name': repo_name,
            'user_name': user_name,
            'lang_code': rc.resource.language.identifier,
            'resource_id': rc.resource.identifier,
            'resource_type': rc.resource.type,
            'title': rc.resource.title,
            'manifest': json.dumps(rc.as_dict()),
            'last_updated': datetime.utcnow()
        }
        # First see if manifest already exists in DB and update it if it is
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        if tx_manifest:
            for key, value in manifest_data.iteritems():
                setattr(tx_manifest, key, value)
            App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data))
            tx_manifest.update()
        else:
            tx_manifest = TxManifest(**manifest_data)
            App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest))
            tx_manifest.insert()

        # Preprocess the files
        preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_')
        results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir)

        # Zip up the massaged files
        zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip')
        App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath))
        add_contents_to_zip(zip_filepath, preprocess_dir)
        App.logger.debug('finished.')

        # Upload zipped file to the S3 bucket
        file_key = self.upload_zip_file(commit_id, zip_filepath)

        job = TxJob()
        job.job_id = self.get_unique_job_id()
        job.identifier = job.job_id
        job.user_name = user_name
        job.repo_name = repo_name
        job.commit_id = commit_id
        job.manifests_id = tx_manifest.id
        job.created_at = datetime.utcnow()
        job.user = user.username  # Username of the token, not necessarily the repo's owner
        job.input_format = rc.resource.file_ext
        job.resource_type = rc.resource.identifier
        job.source = self.source_url_base + "/" + file_key
        job.cdn_bucket = App.cdn_bucket
        job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id)
        job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, job.cdn_file)
        job.callback = App.api_url + '/client/callback'
        job.output_format = 'html'
        job.links = {
            "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id),
            "rel": "self",
            "method": "GET"
        }
        job.success = False

        converter = self.get_converter_module(job)
        linter = self.get_linter_module(job)

        if converter:
            job.convert_module = converter.name
            job.started_at = datetime.utcnow()
            job.expires_at = job.started_at + timedelta(days=1)
            job.eta = job.started_at + timedelta(minutes=5)
            job.status = 'started'
            job.message = 'Conversion started...'
            job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id))
        else:
            job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type,
                                                                                             job.input_format,
                                                                                             job.output_format))
            job.message = 'No converter found'
            job.status = 'failed'

        if linter:
            job.lint_module = linter.name
        else:
            App.logger.debug('No linter was found to lint {0}'.format(job.resource_type))

        job.insert()

        # Get S3 bucket/dir ready
        s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)
        self.clear_commit_directory_in_cdn(s3_commit_key)

        # Create a build log
        build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job,
                                               pusher_username, repo_name, user_name)
        # Upload an initial build_log
        self.upload_build_log_to_s3(build_log_json, s3_commit_key)

        # Update the project.json file
        self.update_project_json(commit_id, job, repo_name, user_name)

        # Convert and lint
        if converter:
            if not preprocessor.is_multiple_jobs():
                self.send_request_to_converter(job, converter)
                if linter:
                    extra_payload = {
                        's3_results_key': s3_commit_key
                    }
                    self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
            else:
                # -----------------------------
                # multiple book project
                # -----------------------------
                books = preprocessor.get_book_list()
                App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))
                book_count = len(books)
                build_log_json['multiple'] = True
                build_log_json['build_logs'] = []
                for i in range(0, len(books)):
                    book = books[i]
                    App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count))
                    # Send job request to tx-manager
                    if i == 0:
                        book_job = job  # use the original job created above for the first book
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book)
                    else:
                        book_job = job.clone()  # copy the original job for this book's job
                        book_job.job_id = self.get_unique_job_id()
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book)
                        book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id)
                        book_job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, book_job.cdn_file)
                        book_job.links = {
                            "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id),
                            "rel": "self",
                            "method": "GET"
                        }
                        book_job.insert()

                    book_job.source = self.build_multipart_source(file_key, book)
                    book_job.update()
                    book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job,
                                                           pusher_username, repo_name, user_name)
                    if len(book) > 0:
                        part = str(i)
                        book_build_log['book'] = book
                        book_build_log['part'] = part
                    build_log_json['build_logs'].append(book_build_log)
                    self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/")
                    self.send_request_to_converter(book_job, converter)
                    if linter:
                        extra_payload = {
                            'single_file': book,
                            's3_results_key': '{0}/{1}'.format(s3_commit_key, i)
                        }
                        self.send_request_to_linter(book_job, linter, commit_url, extra_payload)

        remove_tree(self.base_temp_dir)  # cleanup
        return build_log_json

Example #11

0

Show file

 def close(self):
     """delete temp files"""
     remove_tree(self.download_dir)
     remove_tree(self.files_dir)
     remove_tree(self.output_dir)
     remove(self.output_zip_file)

Example #12

0

Show file

File: client_converter_callback.py Project: unfoldingWord-dev/tx-manager

    def process_callback(self):
        job_id_parts = self.identifier.split('/')
        job_id = job_id_parts[0]
        self.job = TxJob.get(job_id)

        if not self.job:
            error = 'No job found for job_id = {0}, identifier = {0}'.format(job_id, self.identifier)
            App.logger.error(error)
            raise Exception(error)

        if len(job_id_parts) == 4:
            part_count, part_id, book = job_id_parts[1:]
            App.logger.debug('Multiple project, part {0} of {1}, converting book {2}'.
                             format(part_id, part_count, book))
            multiple_project = True
        else:
            App.logger.debug('Single project')
            part_id = None
            multiple_project = False

        self.job.ended_at = datetime.utcnow()
        self.job.success = self.success
        for message in self.log:
            self.job.log_message(message)
        for message in self.warnings:
            self.job.warnings_message(message)
        for message in self.errors:
            self.job.error_message(message)
        if len(self.errors):
            self.job.log_message('{0} function returned with errors.'.format(self.job.convert_module))
        elif len(self.warnings):
            self.job.log_message('{0} function returned with warnings.'.format(self.job.convert_module))
        else:
            self.job.log_message('{0} function returned successfully.'.format(self.job.convert_module))

        if not self.success or len(self.job.errors):
            self.job.success = False
            self.job.status = "failed"
            message = "Conversion failed"
            App.logger.debug("Conversion failed, success: {0}, errors: {1}".format(self.success, self.job.errors))
        elif len(self.job.warnings) > 0:
            self.job.success = True
            self.job.status = "warnings"
            message = "Conversion successful with warnings"
        else:
            self.job.success = True
            self.job.status = "success"
            message = "Conversion successful"

        self.job.message = message
        self.job.log_message(message)
        self.job.log_message('Finished job {0} at {1}'.format(self.job.job_id, self.job.ended_at.strftime("%Y-%m-%dT%H:%M:%SZ")))

        s3_commit_key = 'u/{0}/{1}/{2}'.format(self.job.user_name, self.job.repo_name, self.job.commit_id)
        upload_key = s3_commit_key
        if multiple_project:
            upload_key += "/" + part_id

        App.logger.debug('Callback for commit {0}...'.format(s3_commit_key))

        # Download the ZIP file of the converted files
        converted_zip_url = self.job.output
        converted_zip_file = os.path.join(self.temp_dir, converted_zip_url.rpartition('/')[2])
        remove(converted_zip_file)  # make sure old file not present
        download_success = True
        App.logger.debug('Downloading converted zip file from {0}...'.format(converted_zip_url))
        try:
            download_file(converted_zip_url, converted_zip_file)
        except:
            download_success = False  # if multiple project we note fail and move on
            if not multiple_project:
                remove_tree(self.temp_dir)  # cleanup
            if self.job.errors is None:
                self.job.errors = []
            self.job.errors.append("Missing converted file: " + converted_zip_url)
        finally:
            App.logger.debug('download finished, success={0}'.format(str(download_success)))

        self.job.update()

        if download_success:
            # Unzip the archive
            unzip_dir = self.unzip_converted_files(converted_zip_file)

            # Upload all files to the cdn_bucket with the key of <user>/<repo_name>/<commit> of the repo
            self.upload_converted_files(upload_key, unzip_dir)

        if multiple_project:
            # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log
            build_log_json = self.update_convert_log(s3_commit_key, part_id + "/")

            # mark current part as finished
            self.cdn_upload_contents({}, s3_commit_key + '/' + part_id + '/finished')

        else:  # single part conversion
            # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log
            build_log_json = self.update_convert_log(s3_commit_key)

            self.cdn_upload_contents({}, s3_commit_key + '/finished')  # flag finished

        results = ClientLinterCallback.deploy_if_conversion_finished(s3_commit_key, self.identifier)
        if results:
            self.all_parts_completed = True
            build_log_json = results

        remove_tree(self.temp_dir)  # cleanup
        return build_log_json

Example #13

0

Show file

File: usfm2html_converter.py Project: mondele/tx-manager

    def convert(self):
        App.logger.debug('Processing the Bible USFM files')

        # find the first directory that has usfm files.
        files = get_files(directory=self.files_dir,
                          exclude=self.EXCLUDED_FILES)
        convert_only_list = self.check_for_exclusive_convert()

        current_dir = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(current_dir, 'templates',
                               'template.html')) as template_file:
            template_html = template_file.read()

        for filename in files:
            if filename.endswith('.usfm'):
                base_name = os.path.basename(filename)
                if convert_only_list and (
                        base_name not in convert_only_list
                ):  # see if this is a file we are to convert
                    continue

                msg = 'Converting Bible USFM file: {0}'.format(base_name)
                self.log.info(msg)
                App.logger.debug(msg)

                # Covert the USFM file
                scratch_dir = tempfile.mkdtemp(prefix='scratch_')
                copyfile(filename,
                         os.path.join(scratch_dir, os.path.basename(filename)))
                filebase = os.path.splitext(os.path.basename(filename))[0]
                UsfmTransform.buildSingleHtml(scratch_dir, scratch_dir,
                                              filebase)
                html_filename = filebase + ".html"
                with codecs.open(os.path.join(scratch_dir, html_filename), 'r',
                                 'utf-8-sig') as html_file:
                    converted_html = html_file.read()
                template_soup = BeautifulSoup(template_html, 'html.parser')
                template_soup.head.title.string = self.resource.upper()
                converted_soup = BeautifulSoup(converted_html, 'html.parser')
                content_div = template_soup.find('div', id='content')
                content_div.clear()
                if converted_soup and converted_soup.body:
                    content_div.append(converted_soup.body)
                    content_div.body.unwrap()
                else:
                    content_div.append(
                        '<div class="error">ERROR! NOT CONVERTED!</div>')
                output_file = os.path.join(self.output_dir, html_filename)
                write_file(output_file, unicode(template_soup))
                self.log.info('Converted {0} to {1}.'.format(
                    os.path.basename(filename),
                    os.path.basename(html_filename)))
                remove_tree(scratch_dir)
            else:
                # Directly copy over files that are not USFM files
                try:
                    output_file = os.path.join(self.output_dir,
                                               os.path.basename(filename))
                    if not os.path.exists(output_file):
                        copyfile(filename, output_file)
                except:
                    pass
        self.log.info('Finished processing Bible USFM files.')
        return True

Example #14

0

Show file

 def tearDown(self):
     """Runs after each test."""
     # delete temp files
     remove_tree(self.out_dir)
     remove(self.out_zip_file)

Example #15

0

Show file

File: test_md2html_converter.py Project: unfoldingWord-dev/tx-manager

 def tearDown(self):
     """Runs after each test."""
     # delete temp files
     remove_tree(self.out_dir)
     remove(self.out_zip_file)

Example #16

0

Show file

    def process_callback(self):
        job_id_parts = self.identifier.split('/')
        job_id = job_id_parts[0]
        self.job = TxJob.get(job_id)

        if not self.job:
            error = 'No job found for job_id = {0}, identifier = {1}'.format(
                job_id, self.identifier)
            App.logger.error(error)
            raise Exception(error)

        if len(job_id_parts) == 4:
            part_count, part_id, book = job_id_parts[1:]
            App.logger.debug(
                'Multiple project, part {0} of {1}, converting book {2}'.
                format(part_id, part_count, book))
            multiple_project = True
        else:
            App.logger.debug('Single project')
            part_id = None
            multiple_project = False

        self.job.ended_at = datetime.utcnow()
        self.job.success = self.success
        for message in self.log:
            self.job.log_message(message)
        for message in self.warnings:
            self.job.warnings_message(message)
        for message in self.errors:
            self.job.error_message(message)
        if len(self.errors):
            self.job.log_message('{0} function returned with errors.'.format(
                self.job.convert_module))
        elif len(self.warnings):
            self.job.log_message('{0} function returned with warnings.'.format(
                self.job.convert_module))
        else:
            self.job.log_message('{0} function returned successfully.'.format(
                self.job.convert_module))

        if not self.success or len(self.job.errors):
            self.job.success = False
            self.job.status = "failed"
            message = "Conversion failed"
            App.logger.debug(
                "Conversion failed, success: {0}, errors: {1}".format(
                    self.success, self.job.errors))
        elif len(self.job.warnings) > 0:
            self.job.success = True
            self.job.status = "warnings"
            message = "Conversion successful with warnings"
        else:
            self.job.success = True
            self.job.status = "success"
            message = "Conversion successful"

        self.job.message = message
        self.job.log_message(message)
        self.job.log_message('Finished job {0} at {1}'.format(
            self.job.job_id, self.job.ended_at.strftime("%Y-%m-%dT%H:%M:%SZ")))

        s3_commit_key = 'u/{0}/{1}/{2}'.format(self.job.user_name,
                                               self.job.repo_name,
                                               self.job.commit_id)
        upload_key = s3_commit_key
        if multiple_project:
            upload_key += "/" + part_id

        App.logger.debug('Callback for commit {0}...'.format(s3_commit_key))

        # Download the ZIP file of the converted files
        converted_zip_url = self.job.output
        converted_zip_file = os.path.join(self.temp_dir,
                                          converted_zip_url.rpartition('/')[2])
        remove(converted_zip_file)  # make sure old file not present
        download_success = True
        App.logger.debug('Downloading converted zip file from {0}...'.format(
            converted_zip_url))
        try:
            download_file(converted_zip_url, converted_zip_file)
        except:
            download_success = False  # if multiple project we note fail and move on
            if not multiple_project:
                remove_tree(self.temp_dir)  # cleanup
            if self.job.errors is None:
                self.job.errors = []
            self.job.errors.append("Missing converted file: " +
                                   converted_zip_url)
        finally:
            App.logger.debug('download finished, success={0}'.format(
                str(download_success)))

        self.job.update()

        if download_success:
            # Unzip the archive
            unzip_dir = self.unzip_converted_files(converted_zip_file)

            # Upload all files to the cdn_bucket with the key of <user>/<repo_name>/<commit> of the repo
            self.upload_converted_files(upload_key, unzip_dir)

        if multiple_project:
            # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log
            build_log_json = self.update_convert_log(s3_commit_key,
                                                     part_id + "/")

            # mark current part as finished
            self.cdn_upload_contents({}, s3_commit_key + '/' + part_id +
                                     '/finished')

        else:  # single part conversion
            # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log
            build_log_json = self.update_convert_log(s3_commit_key)

            self.cdn_upload_contents({}, s3_commit_key +
                                     '/finished')  # flag finished

        results = ClientLinterCallback.deploy_if_conversion_finished(
            s3_commit_key, self.identifier)
        if results:
            self.all_parts_completed = True
            build_log_json = results

        remove_tree(self.temp_dir)  # cleanup
        return build_log_json

Example #17

0

Show file

 def tearDown(self):
     """Runs after each test."""
     # delete temp files
     remove_tree(self.temp_dir)

Example #18

0

Show file

File: linter.py Project: unfoldingWord-dev/tx-manager

 def close(self):
     """delete temp files"""
     remove_tree(self.temp_dir)

Example #19

0

Show file

 def close(self):
     """delete temp files"""
     remove_tree(self.temp_dir)

Example #20

0

Show file

File: test_usfm2html_converter.py Project: unfoldingWord-dev/tx-manager

 def tearDown(self):
     """Runs after each test."""
     # delete temp files
     remove_tree(self.temp_dir)