def test_lint_overflow_warnings(self, mock_invoke_markdown_linter): # given warning = { 'errorContext': 'dummy error message', 'lineNumber': 42, 'ruleDescription': 'dummy rule' } warnings = [] warning_count = 202 for i in range(0, warning_count): warnings.append(warning) mock_invoke_markdown_linter.return_value = { # Don't care about markdown linting here, just specific tw linting '/tmp/tmp_lint_EYZ5zV/en_tn/2th/front/intro.md': warnings } expected_warnings = 200 # should be limited zip_file = os.path.join(self.resources_dir, 'tn_linter', 'en_tn.zip') out_dir = self.unzip_resource(zip_file) # remove everything past genesis for dir in BOOK_NUMBERS: book = '{0}-{1}'.format(BOOK_NUMBERS[dir], dir.upper()) link = self.get_link_for_book(book) book_path = os.path.join(out_dir, 'en_tn', link) if os.path.exists(book_path): if book > "02": file_utils.remove_tree(book_path) new_zip = self.create_new_zip(out_dir) linter = TnLinter(source_file=new_zip, commit_data=self.commit_data) # when results = linter.run() # then self.assertEqual(len(results['warnings']), expected_warnings)
def test_lint_broken_links(self, mock_invoke_markdown_linter): # given mock_invoke_markdown_linter.return_value = { } # Don't care about markdown linting here, just specific tw linting expected_warnings = 66 - 2 # we only leave 2 books zip_file = os.path.join(self.resources_dir, 'tq_linter', 'en_tq.zip') out_dir = self.unzip_resource(zip_file) # remove everything past genesis for book in BOOK_NUMBERS: book_path = os.path.join(out_dir, 'en_tq', book) if os.path.exists(book_path): if BOOK_NUMBERS[book] > "02": file_utils.remove_tree(book_path) # put a verse in exo so that we can test that there is some content there file_path = os.path.join(out_dir, 'en_tq/exo/01/05.md') file_utils.write_file(file_path, 'dummy') # create chapter in lev with no md files so that we can test that there is no content there file_path = os.path.join( os.path.join(out_dir, 'en_tq/lev/01/readme.txt')) file_utils.write_file(file_path, 'dummy') new_zip = self.create_new_zip(out_dir) linter = TqLinter(source_file=new_zip, commit_data=self.commit_data) # when linter.run() # then self.verify_results_warnings_count(expected_warnings, linter)
def test_lint_overflow_warnings(self, mock_invoke_markdown_linter): # given warning = {'errorContext': 'dummy error message', 'lineNumber': 42, 'ruleDescription': 'dummy rule'} warnings = [] warning_count = 202 for i in range(0, warning_count): warnings.append(warning) mock_invoke_markdown_linter.return_value = { # Don't care about markdown linting here, just specific tw linting '/tmp/tmp_lint_EYZ5zV/en_tn/2th/front/intro.md': warnings } expected_warnings = 200 # should be limited zip_file = os.path.join(self.resources_dir, 'tn_linter', 'en_tn.zip') out_dir = self.unzip_resource(zip_file) # remove everything past genesis for dir in BOOK_NUMBERS: book = '{0}-{1}'.format(BOOK_NUMBERS[dir], dir.upper()) link = self.get_link_for_book(book) book_path = os.path.join(out_dir, 'en_tn', link) if os.path.exists(book_path): if book > "02": file_utils.remove_tree(book_path) new_zip = self.create_new_zip(out_dir) linter = TnLinter(source_file=new_zip, commit_data=self.commit_data) # when results = linter.run() # then self.assertEqual(len(results['warnings']), expected_warnings)
def process_callback(self): if not self.identifier: error = 'No identifier found' App.logger.error(error) raise Exception(error) if not self.s3_results_key: error = 'No s3_results_key found for identifier = {0}'.format(self.identifier) App.logger.error(error) raise Exception(error) id_parts = self.identifier.split('/') self.multipart = len(id_parts) > 3 if self.multipart: part_count, part_id, book = id_parts[1:4] App.logger.debug('Multiple project, part {0} of {1}, linted book {2}'. format(part_id, part_count, book)) s3__master_results_key = '/'.join(self.s3_results_key.split('/')[:-1]) else: App.logger.debug('Single project') s3__master_results_key = self.s3_results_key build_log = { 'identifier': self.identifier, 'success': self.success, 'multipart_project': self.multipart, 'log': self.log, 'warnings': self.warnings, 'errors': self.errors, 's3_commit_key': self.s3_results_key } if not self.success: msg = "Linter failed for identifier: " + self.identifier build_log['warnings'].append(msg) App.logger.error(msg) else: App.logger.debug("Linter {0} {1} warnings:\n{1}".format(self.identifier, len(self.warnings), '\n'.join(self.warnings[:5]))) has_warnings = len(build_log['warnings']) > 0 if has_warnings: msg = "Linter {0} has Warnings!".format(self.identifier) build_log['log'].append(msg) else: msg = "Linter {0} completed with no warnings".format(self.identifier) build_log['log'].append(msg) ClientLinterCallback.upload_build_log(build_log, 'lint_log.json', self.temp_dir, self.s3_results_key) results = ClientLinterCallback.deploy_if_conversion_finished(s3__master_results_key, self.identifier) if results: self.all_parts_completed = True build_log = results remove_tree(self.temp_dir) # cleanup App.db_close() return build_log
def run(self): """ Call the converters """ success = False try: if not self.input_zip_file or not os.path.exists( self.input_zip_file): # No input zip file yet, so we need to download the archive self.download_archive() # unzip the input archive App.logger.debug("Unzipping {0} to {1}".format( self.input_zip_file, self.files_dir)) unzip(self.input_zip_file, self.files_dir) # convert method called App.logger.debug("Converting files...") if self.convert(): App.logger.debug("Was able to convert {0}".format( self.resource)) # zip the output dir to the output archive App.logger.debug("Adding files in {0} to {1}".format( self.output_dir, self.output_zip_file)) add_contents_to_zip(self.output_zip_file, self.output_dir) remove_tree(self.output_dir) # upload the output archive either to cdn_bucket or to a file (no cdn_bucket) App.logger.debug("Uploading archive to {0}/{1}".format( self.cdn_bucket, self.cdn_file)) self.upload_archive() remove(self.output_zip_file) App.logger.debug("Uploaded") success = True else: self.log.error('Resource {0} currently not supported.'.format( self.resource)) except Exception as e: self.log.error('Conversion process ended abnormally: {0}'.format( e.message)) App.logger.error('{0}: {1}'.format(str(e), traceback.format_exc())) results = { 'identifier': self.identifier, 'success': success and len(self.log.logs['error']) == 0, 'info': self.log.logs['info'], 'warnings': self.log.logs['warning'], 'errors': self.log.logs['error'] } if self.callback is not None: self.callback_results = results self.do_callback(self.callback, self.callback_results) App.logger.debug(results) return results
def deploy_if_conversion_finished(s3_results_key, identifier): """ check if all parts are finished, and if so then save merged build_log as well as update jobs table :param s3_results_key: format - u/user/repo/commid_id :param identifier: either job_id/part_count/part_id/book if multi-part job or job_id if single job :return: """ output_dir = tempfile.mkdtemp(suffix="", prefix="client_callback_deploy_") build_log = None id_parts = identifier.split('/') multiple_project = len(id_parts) > 3 all_parts_completed = True if not multiple_project: App.logger.debug('Single job: checking if convert and lint have completed.') build_log = ClientLinterCallback.merge_build_status_for_part(build_log, s3_results_key, output_dir) else: App.logger.debug('Multiple parts: Checking if all parts completed.') job_id, part_count, part_id, book = id_parts[:4] for i in range(0, int(part_count)): part_key = "{0}/{1}".format(s3_results_key, i) build_log = ClientLinterCallback.merge_build_status_for_part(build_log, part_key, output_dir) if build_log is None: App.logger.debug('Part {0} not complete'.format(part_key)) all_parts_completed = False if all_parts_completed and build_log is not None: # if all parts found, save build log and kick off deploy # set overall status if len(build_log['errors']): build_log['status'] = 'errors' elif len(build_log['warnings']): build_log['status'] = 'warnings' build_log['ended_at'] = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") if multiple_project: build_log['multiple'] = True ClientLinterCallback.upload_build_log(build_log, "final_build_log.json", output_dir, s3_results_key) if not multiple_project: ClientLinterCallback.upload_build_log(build_log, "build_log.json", output_dir, s3_results_key) ClientLinterCallback.update_project_file(build_log, output_dir) App.logger.debug('All parts completed') else: App.logger.debug('Not all parts completed') build_log = None file_utils.remove_tree(output_dir) return build_log
def test_lint_broken_links(self, mock_invoke_markdown_linter): # given mock_invoke_markdown_linter.return_value = { # Don't care about markdown linting here, just specific tw linting '/tmp/tmp_lint_EYZ5zV/en_tn/2th/front/intro.md': [{ 'errorContext': 'dummy error message', 'lineNumber': 42, 'ruleDescription': 'dummy rule' }] } expected_warnings = 64 + 1 # 64 missing books + 1 markdown warning zip_file = os.path.join(self.resources_dir, 'tn_linter', 'en_tn.zip') out_dir = self.unzip_resource(zip_file) # remove everything past genesis for dir in BOOK_NUMBERS: book = '{0}-{1}'.format(BOOK_NUMBERS[dir], dir.upper()) link = self.get_link_for_book(book) book_path = os.path.join(out_dir, 'en_tn', link) if os.path.exists(book_path): if book > "02": file_utils.remove_tree(book_path) # put a verse in exo so that we can test that there is some content there file_path = os.path.join(out_dir, 'en_tn/exo/01/05.md') file_utils.write_file(file_path, 'dummy') # create chapter in lev with no md files so that we can test that there is no content there file_path = os.path.join( os.path.join(out_dir, 'en_tn/lev/01/readme.txt')) file_utils.write_file(file_path, 'dummy') new_zip = self.create_new_zip(out_dir) linter = TnLinter(source_file=new_zip, commit_data=self.commit_data) # when linter.run() # then self.verify_results_warnings_count(expected_warnings, linter)
def test_lint_broken_links(self, mock_invoke_markdown_linter): # given mock_invoke_markdown_linter.return_value = { # Don't care about markdown linting here, just specific tw linting '/tmp/tmp_lint_EYZ5zV/en_tn/2th/front/intro.md': [ { 'errorContext': 'dummy error message', 'lineNumber': 42, 'ruleDescription': 'dummy rule' } ] } expected_warnings = 64 + 1 # 64 missing books + 1 markdown warning zip_file = os.path.join(self.resources_dir, 'tn_linter', 'en_tn.zip') out_dir = self.unzip_resource(zip_file) # remove everything past genesis for dir in BOOK_NUMBERS: book = '{0}-{1}'.format(BOOK_NUMBERS[dir], dir.upper()) link = self.get_link_for_book(book) book_path = os.path.join(out_dir, 'en_tn', link) if os.path.exists(book_path): if book > "02": file_utils.remove_tree(book_path) # put a verse in exo so that we can test that there is some content there file_path = os.path.join(out_dir, 'en_tn/exo/01/05.md') file_utils.write_file(file_path, 'dummy') # create chapter in lev with no md files so that we can test that there is no content there file_path = os.path.join(os.path.join(out_dir, 'en_tn/lev/01/readme.txt')) file_utils.write_file(file_path, 'dummy') new_zip = self.create_new_zip(out_dir) linter = TnLinter(source_file=new_zip, commit_data=self.commit_data) # when linter.run() # then self.verify_results_warnings_count(expected_warnings, linter)
def process_webhook(self): # Check that we got commit data if not self.commit_data: raise Exception('No commit data from DCS was found in the Payload') # Check that the user token is valid if not App.gogs_user_token: raise Exception('DCS user token not given in Payload.') user = App.gogs_handler().get_user(App.gogs_user_token) if not user: raise Exception('Invalid DCS user token given in Payload') # Check that the URL to the DCS repo is valid if not self.commit_data['repository']['html_url'].startswith(App.gogs_url): raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url)) # Check that commit is on repo's default branch, else quit try: commit_branch = self.commit_data['ref'].split('/')[2] except IndexError: raise Exception('Could not determine commit branch, exiting.') except KeyError: Exception('This does not appear to be a push, exiting.') if commit_branch != self.commit_data['repository']['default_branch']: raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch)) # Get the commit_id, commit_url commit_id = self.commit_data['after'] commit = None for commit in self.commit_data['commits']: if commit['id'] == commit_id: break commit_id = commit_id[:10] # Only use the short form commit_url = commit['url'] # Gather other details from the commit that we will note for the job(s) user_name = self.commit_data['repository']['owner']['username'] repo_name = self.commit_data['repository']['name'] compare_url = self.commit_data['compare_url'] commit_message = commit['message'] if 'pusher' in self.commit_data: pusher = self.commit_data['pusher'] else: pusher = {'username': commit['author']['username']} pusher_username = pusher['username'] # Download and unzip the repo files repo_dir = self.get_repo_files(commit_url, repo_name) # Get the resource container rc = RC(repo_dir, repo_name) # Save manifest to manifest table manifest_data = { 'repo_name': repo_name, 'user_name': user_name, 'lang_code': rc.resource.language.identifier, 'resource_id': rc.resource.identifier, 'resource_type': rc.resource.type, 'title': rc.resource.title, 'manifest': json.dumps(rc.as_dict()), 'last_updated': datetime.utcnow() } # First see if manifest already exists in DB and update it if it is tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name) if tx_manifest: for key, value in manifest_data.iteritems(): setattr(tx_manifest, key, value) App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data)) tx_manifest.update() else: tx_manifest = TxManifest(**manifest_data) App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest)) tx_manifest.insert() # Preprocess the files preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_') results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir) # Zip up the massaged files zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip') App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath)) add_contents_to_zip(zip_filepath, preprocess_dir) App.logger.debug('finished.') # Upload zipped file to the S3 bucket file_key = self.upload_zip_file(commit_id, zip_filepath) job = TxJob() job.job_id = self.get_unique_job_id() job.identifier = job.job_id job.user_name = user_name job.repo_name = repo_name job.commit_id = commit_id job.manifests_id = tx_manifest.id job.created_at = datetime.utcnow() job.user = user.username # Username of the token, not necessarily the repo's owner job.input_format = rc.resource.file_ext job.resource_type = rc.resource.identifier job.source = self.source_url_base + "/" + file_key job.cdn_bucket = App.cdn_bucket job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id) job.output = 'https://{0}/{1}'.format(App.cdn_bucket, job.cdn_file) job.callback = App.api_url + '/client/callback' job.output_format = 'html' job.links = { "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id), "rel": "self", "method": "GET" } job.success = False converter = self.get_converter_module(job) linter = self.get_linter_module(job) if converter: job.convert_module = converter.name job.started_at = datetime.utcnow() job.expires_at = job.started_at + timedelta(days=1) job.eta = job.started_at + timedelta(minutes=5) job.status = 'started' job.message = 'Conversion started...' job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)) else: job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type, job.input_format, job.output_format)) job.message = 'No converter found' job.status = 'failed' if linter: job.lint_module = linter.name else: App.logger.debug('No linter was found to lint {0}'.format(job.resource_type)) job.insert() # Get S3 bucket/dir ready s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id) self.clear_commit_directory_in_cdn(s3_commit_key) # Create a build log build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job, pusher_username, repo_name, user_name) # Upload an initial build_log self.upload_build_log_to_s3(build_log_json, s3_commit_key) # Update the project.json file self.update_project_json(commit_id, job, repo_name, user_name) # Convert and lint if converter: if not preprocessor.is_multiple_jobs(): self.send_request_to_converter(job, converter) if linter: extra_payload = { 's3_results_key': s3_commit_key } self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload) else: # ----------------------------- # multiple book project # ----------------------------- books = preprocessor.get_book_list() App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books)) book_count = len(books) build_log_json['multiple'] = True build_log_json['build_logs'] = [] for i in range(0, len(books)): book = books[i] App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count)) # Send job request to tx-manager if i == 0: book_job = job # use the original job created above for the first book book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book) else: book_job = job.clone() # copy the original job for this book's job book_job.job_id = self.get_unique_job_id() book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book) book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id) book_job.output = 'https://{0}/{1}'.format(App.cdn_bucket, book_job.cdn_file) book_job.links = { "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id), "rel": "self", "method": "GET" } book_job.insert() book_job.source = self.build_multipart_source(file_key, book) book_job.update() book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job, pusher_username, repo_name, user_name) if len(book) > 0: part = str(i) book_build_log['book'] = book book_build_log['part'] = part build_log_json['build_logs'].append(book_build_log) self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/") self.send_request_to_converter(book_job, converter) if linter: extra_payload = { 'single_file': book, 's3_results_key': '{0}/{1}'.format(s3_commit_key, i) } self.send_request_to_linter(book_job, linter, commit_url, extra_payload) remove_tree(self.base_temp_dir) # cleanup return build_log_json
def process_webhook(self): # Check that we got commit data if not self.commit_data: raise Exception('No commit data from DCS was found in the Payload') # Check that the user token is valid if not App.gogs_user_token: raise Exception('DCS user token not given in Payload.') user = App.gogs_handler().get_user(App.gogs_user_token) if not user: raise Exception('Invalid DCS user token given in Payload') # Check that the URL to the DCS repo is valid if not self.commit_data['repository']['html_url'].startswith(App.gogs_url): raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url)) # Check that commit is on repo's default branch, else quit try: commit_branch = self.commit_data['ref'].split('/')[2] except IndexError: raise Exception('Could not determine commit branch, exiting.') except KeyError: Exception('This does not appear to be a push, exiting.') if commit_branch != self.commit_data['repository']['default_branch']: raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch)) # Get the commit_id, commit_url commit_id = self.commit_data['after'] commit = None for commit in self.commit_data['commits']: if commit['id'] == commit_id: break commit_id = commit_id[:10] # Only use the short form commit_url = commit['url'] # Gather other details from the commit that we will note for the job(s) user_name = self.commit_data['repository']['owner']['username'] repo_name = self.commit_data['repository']['name'] compare_url = self.commit_data['compare_url'] commit_message = commit['message'] if 'pusher' in self.commit_data: pusher = self.commit_data['pusher'] else: pusher = {'username': commit['author']['username']} pusher_username = pusher['username'] # Download and unzip the repo files repo_dir = self.get_repo_files(commit_url, repo_name) # Get the resource container rc = RC(repo_dir, repo_name) # Save manifest to manifest table manifest_data = { 'repo_name': repo_name, 'user_name': user_name, 'lang_code': rc.resource.language.identifier, 'resource_id': rc.resource.identifier, 'resource_type': rc.resource.type, 'title': rc.resource.title, 'manifest': json.dumps(rc.as_dict()), 'last_updated': datetime.utcnow() } # First see if manifest already exists in DB and update it if it is tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name) if tx_manifest: for key, value in manifest_data.iteritems(): setattr(tx_manifest, key, value) App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data)) tx_manifest.update() else: tx_manifest = TxManifest(**manifest_data) App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest)) tx_manifest.insert() # Preprocess the files preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_') results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir) # Zip up the massaged files zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip') App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath)) add_contents_to_zip(zip_filepath, preprocess_dir) App.logger.debug('finished.') # Upload zipped file to the S3 bucket file_key = self.upload_zip_file(commit_id, zip_filepath) job = TxJob() job.job_id = self.get_unique_job_id() job.identifier = job.job_id job.user_name = user_name job.repo_name = repo_name job.commit_id = commit_id job.manifests_id = tx_manifest.id job.created_at = datetime.utcnow() job.user = user.username # Username of the token, not necessarily the repo's owner job.input_format = rc.resource.file_ext job.resource_type = rc.resource.identifier job.source = self.source_url_base + "/" + file_key job.cdn_bucket = App.cdn_bucket job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id) job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, job.cdn_file) job.callback = App.api_url + '/client/callback' job.output_format = 'html' job.links = { "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id), "rel": "self", "method": "GET" } job.success = False converter = self.get_converter_module(job) linter = self.get_linter_module(job) if converter: job.convert_module = converter.name job.started_at = datetime.utcnow() job.expires_at = job.started_at + timedelta(days=1) job.eta = job.started_at + timedelta(minutes=5) job.status = 'started' job.message = 'Conversion started...' job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)) else: job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type, job.input_format, job.output_format)) job.message = 'No converter found' job.status = 'failed' if linter: job.lint_module = linter.name else: App.logger.debug('No linter was found to lint {0}'.format(job.resource_type)) job.insert() # Get S3 bucket/dir ready s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id) self.clear_commit_directory_in_cdn(s3_commit_key) # Create a build log build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job, pusher_username, repo_name, user_name) # Upload an initial build_log self.upload_build_log_to_s3(build_log_json, s3_commit_key) # Update the project.json file self.update_project_json(commit_id, job, repo_name, user_name) # Convert and lint if converter: if not preprocessor.is_multiple_jobs(): self.send_request_to_converter(job, converter) if linter: extra_payload = { 's3_results_key': s3_commit_key } self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload) else: # ----------------------------- # multiple book project # ----------------------------- books = preprocessor.get_book_list() App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books)) book_count = len(books) build_log_json['multiple'] = True build_log_json['build_logs'] = [] for i in range(0, len(books)): book = books[i] App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count)) # Send job request to tx-manager if i == 0: book_job = job # use the original job created above for the first book book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book) else: book_job = job.clone() # copy the original job for this book's job book_job.job_id = self.get_unique_job_id() book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book) book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id) book_job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, book_job.cdn_file) book_job.links = { "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id), "rel": "self", "method": "GET" } book_job.insert() book_job.source = self.build_multipart_source(file_key, book) book_job.update() book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job, pusher_username, repo_name, user_name) if len(book) > 0: part = str(i) book_build_log['book'] = book book_build_log['part'] = part build_log_json['build_logs'].append(book_build_log) self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/") self.send_request_to_converter(book_job, converter) if linter: extra_payload = { 'single_file': book, 's3_results_key': '{0}/{1}'.format(s3_commit_key, i) } self.send_request_to_linter(book_job, linter, commit_url, extra_payload) remove_tree(self.base_temp_dir) # cleanup return build_log_json
def close(self): """delete temp files""" remove_tree(self.download_dir) remove_tree(self.files_dir) remove_tree(self.output_dir) remove(self.output_zip_file)
def process_callback(self): job_id_parts = self.identifier.split('/') job_id = job_id_parts[0] self.job = TxJob.get(job_id) if not self.job: error = 'No job found for job_id = {0}, identifier = {0}'.format(job_id, self.identifier) App.logger.error(error) raise Exception(error) if len(job_id_parts) == 4: part_count, part_id, book = job_id_parts[1:] App.logger.debug('Multiple project, part {0} of {1}, converting book {2}'. format(part_id, part_count, book)) multiple_project = True else: App.logger.debug('Single project') part_id = None multiple_project = False self.job.ended_at = datetime.utcnow() self.job.success = self.success for message in self.log: self.job.log_message(message) for message in self.warnings: self.job.warnings_message(message) for message in self.errors: self.job.error_message(message) if len(self.errors): self.job.log_message('{0} function returned with errors.'.format(self.job.convert_module)) elif len(self.warnings): self.job.log_message('{0} function returned with warnings.'.format(self.job.convert_module)) else: self.job.log_message('{0} function returned successfully.'.format(self.job.convert_module)) if not self.success or len(self.job.errors): self.job.success = False self.job.status = "failed" message = "Conversion failed" App.logger.debug("Conversion failed, success: {0}, errors: {1}".format(self.success, self.job.errors)) elif len(self.job.warnings) > 0: self.job.success = True self.job.status = "warnings" message = "Conversion successful with warnings" else: self.job.success = True self.job.status = "success" message = "Conversion successful" self.job.message = message self.job.log_message(message) self.job.log_message('Finished job {0} at {1}'.format(self.job.job_id, self.job.ended_at.strftime("%Y-%m-%dT%H:%M:%SZ"))) s3_commit_key = 'u/{0}/{1}/{2}'.format(self.job.user_name, self.job.repo_name, self.job.commit_id) upload_key = s3_commit_key if multiple_project: upload_key += "/" + part_id App.logger.debug('Callback for commit {0}...'.format(s3_commit_key)) # Download the ZIP file of the converted files converted_zip_url = self.job.output converted_zip_file = os.path.join(self.temp_dir, converted_zip_url.rpartition('/')[2]) remove(converted_zip_file) # make sure old file not present download_success = True App.logger.debug('Downloading converted zip file from {0}...'.format(converted_zip_url)) try: download_file(converted_zip_url, converted_zip_file) except: download_success = False # if multiple project we note fail and move on if not multiple_project: remove_tree(self.temp_dir) # cleanup if self.job.errors is None: self.job.errors = [] self.job.errors.append("Missing converted file: " + converted_zip_url) finally: App.logger.debug('download finished, success={0}'.format(str(download_success))) self.job.update() if download_success: # Unzip the archive unzip_dir = self.unzip_converted_files(converted_zip_file) # Upload all files to the cdn_bucket with the key of <user>/<repo_name>/<commit> of the repo self.upload_converted_files(upload_key, unzip_dir) if multiple_project: # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log build_log_json = self.update_convert_log(s3_commit_key, part_id + "/") # mark current part as finished self.cdn_upload_contents({}, s3_commit_key + '/' + part_id + '/finished') else: # single part conversion # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log build_log_json = self.update_convert_log(s3_commit_key) self.cdn_upload_contents({}, s3_commit_key + '/finished') # flag finished results = ClientLinterCallback.deploy_if_conversion_finished(s3_commit_key, self.identifier) if results: self.all_parts_completed = True build_log_json = results remove_tree(self.temp_dir) # cleanup return build_log_json
def convert(self): App.logger.debug('Processing the Bible USFM files') # find the first directory that has usfm files. files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES) convert_only_list = self.check_for_exclusive_convert() current_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file: template_html = template_file.read() for filename in files: if filename.endswith('.usfm'): base_name = os.path.basename(filename) if convert_only_list and ( base_name not in convert_only_list ): # see if this is a file we are to convert continue msg = 'Converting Bible USFM file: {0}'.format(base_name) self.log.info(msg) App.logger.debug(msg) # Covert the USFM file scratch_dir = tempfile.mkdtemp(prefix='scratch_') copyfile(filename, os.path.join(scratch_dir, os.path.basename(filename))) filebase = os.path.splitext(os.path.basename(filename))[0] UsfmTransform.buildSingleHtml(scratch_dir, scratch_dir, filebase) html_filename = filebase + ".html" with codecs.open(os.path.join(scratch_dir, html_filename), 'r', 'utf-8-sig') as html_file: converted_html = html_file.read() template_soup = BeautifulSoup(template_html, 'html.parser') template_soup.head.title.string = self.resource.upper() converted_soup = BeautifulSoup(converted_html, 'html.parser') content_div = template_soup.find('div', id='content') content_div.clear() if converted_soup and converted_soup.body: content_div.append(converted_soup.body) content_div.body.unwrap() else: content_div.append( '<div class="error">ERROR! NOT CONVERTED!</div>') output_file = os.path.join(self.output_dir, html_filename) write_file(output_file, unicode(template_soup)) self.log.info('Converted {0} to {1}.'.format( os.path.basename(filename), os.path.basename(html_filename))) remove_tree(scratch_dir) else: # Directly copy over files that are not USFM files try: output_file = os.path.join(self.output_dir, os.path.basename(filename)) if not os.path.exists(output_file): copyfile(filename, output_file) except: pass self.log.info('Finished processing Bible USFM files.') return True
def tearDown(self): """Runs after each test.""" # delete temp files remove_tree(self.out_dir) remove(self.out_zip_file)
def process_callback(self): job_id_parts = self.identifier.split('/') job_id = job_id_parts[0] self.job = TxJob.get(job_id) if not self.job: error = 'No job found for job_id = {0}, identifier = {1}'.format( job_id, self.identifier) App.logger.error(error) raise Exception(error) if len(job_id_parts) == 4: part_count, part_id, book = job_id_parts[1:] App.logger.debug( 'Multiple project, part {0} of {1}, converting book {2}'. format(part_id, part_count, book)) multiple_project = True else: App.logger.debug('Single project') part_id = None multiple_project = False self.job.ended_at = datetime.utcnow() self.job.success = self.success for message in self.log: self.job.log_message(message) for message in self.warnings: self.job.warnings_message(message) for message in self.errors: self.job.error_message(message) if len(self.errors): self.job.log_message('{0} function returned with errors.'.format( self.job.convert_module)) elif len(self.warnings): self.job.log_message('{0} function returned with warnings.'.format( self.job.convert_module)) else: self.job.log_message('{0} function returned successfully.'.format( self.job.convert_module)) if not self.success or len(self.job.errors): self.job.success = False self.job.status = "failed" message = "Conversion failed" App.logger.debug( "Conversion failed, success: {0}, errors: {1}".format( self.success, self.job.errors)) elif len(self.job.warnings) > 0: self.job.success = True self.job.status = "warnings" message = "Conversion successful with warnings" else: self.job.success = True self.job.status = "success" message = "Conversion successful" self.job.message = message self.job.log_message(message) self.job.log_message('Finished job {0} at {1}'.format( self.job.job_id, self.job.ended_at.strftime("%Y-%m-%dT%H:%M:%SZ"))) s3_commit_key = 'u/{0}/{1}/{2}'.format(self.job.user_name, self.job.repo_name, self.job.commit_id) upload_key = s3_commit_key if multiple_project: upload_key += "/" + part_id App.logger.debug('Callback for commit {0}...'.format(s3_commit_key)) # Download the ZIP file of the converted files converted_zip_url = self.job.output converted_zip_file = os.path.join(self.temp_dir, converted_zip_url.rpartition('/')[2]) remove(converted_zip_file) # make sure old file not present download_success = True App.logger.debug('Downloading converted zip file from {0}...'.format( converted_zip_url)) try: download_file(converted_zip_url, converted_zip_file) except: download_success = False # if multiple project we note fail and move on if not multiple_project: remove_tree(self.temp_dir) # cleanup if self.job.errors is None: self.job.errors = [] self.job.errors.append("Missing converted file: " + converted_zip_url) finally: App.logger.debug('download finished, success={0}'.format( str(download_success))) self.job.update() if download_success: # Unzip the archive unzip_dir = self.unzip_converted_files(converted_zip_file) # Upload all files to the cdn_bucket with the key of <user>/<repo_name>/<commit> of the repo self.upload_converted_files(upload_key, unzip_dir) if multiple_project: # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log build_log_json = self.update_convert_log(s3_commit_key, part_id + "/") # mark current part as finished self.cdn_upload_contents({}, s3_commit_key + '/' + part_id + '/finished') else: # single part conversion # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log build_log_json = self.update_convert_log(s3_commit_key) self.cdn_upload_contents({}, s3_commit_key + '/finished') # flag finished results = ClientLinterCallback.deploy_if_conversion_finished( s3_commit_key, self.identifier) if results: self.all_parts_completed = True build_log_json = results remove_tree(self.temp_dir) # cleanup return build_log_json
def tearDown(self): """Runs after each test.""" # delete temp files remove_tree(self.temp_dir)
def close(self): """delete temp files""" remove_tree(self.temp_dir)