def update_jobs_table(s3_results_key, build_log, output_dir): job_id = build_log['job_id'] App.logger.debug('merging build_logs for job : ' + job_id) build_log['ended_at'] = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") job = TxJob.get(job_id) if job: job.status = build_log['status'] job.log = build_log['log'] job.warnings = build_log['warnings'] job.errors = build_log['errors'] job.message = build_log['message'] job.success = build_log['success'] job.ended_at = build_log['ended_at'] # set overall status if len(job.errors): job.status = 'errors' job.success = False elif len(job.warnings): job.status = 'warnings' job.update() else: job_data = {'manifests_id': 0} # set a default if not present for key in build_log: if hasattr(TxJob, key): job_data[key] = build_log[key] job = TxJob(**job_data) job.insert() # flag this part as done ClientLinterCallback.upload_build_log(build_log, 'merged.json', output_dir, s3_results_key) # update build_log to start deploy of this part ClientLinterCallback.upload_build_log(build_log, 'build_log.json', output_dir, s3_results_key, cache_time=600) return
def populate_tables(self): for idx in self.job_items: tx_job = TxJob(**self.job_items[idx]) tx_job.insert() for idx in self.module_items: tx_module = TxModule(**self.module_items[idx]) tx_module.insert()
def poll_until_all_jobs_finished(self, build_logs): job = None finished = [] job_count = len(build_logs) polling_timeout = 5 * 60 # poll for up to 5 minutes for job to complete or error sleep_interval = 5 # how often to check for completion done = False start = time.time() end = start + polling_timeout while (time.time() < end) and not done: time.sleep(sleep_interval) # delay before polling again for build_log in build_logs: # check for completion of each part job_id = build_log['job_id'] if job_id in finished: continue # skip if job already finished job = TxJob.get(job_id) self.assertIsNotNone(job) App.logger.debug("job " + job_id + " status at " + str(elapsed_time(start)) + ":\n" + str(job.log)) if job.ended_at is not None: finished.append(job_id) end = time.time() + polling_timeout # reset timeout if len(finished) >= job_count: done = True # finished break if len(finished) < job_count: for build_log in build_logs: # check for completion of each part job_id = build_log['job_id'] if job_id not in finished: self.warn("Timeout waiting for start on job: " + job_id) return done, job
def get_unique_job_id(self): """ :return string: """ job_id = hashlib.sha256(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")).hexdigest() while TxJob.get(job_id): job_id = hashlib.sha256(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")).hexdigest() return job_id
def test_query_job(self): jobs = TxJob.query() App.logger.debug(jobs) self.assertEqual(jobs.count(), len(self.items)) for job in jobs: print(job) self.assertEqual(job.identifier, self.items[job.job_id]['identifier'])
def list_jobs(self, data, must_be_authenticated=True): if must_be_authenticated: if 'gogs_user_token' not in data: raise Exception('"gogs_user_token" not given.') App.gogs_user_token = data['gogs_user_token'] user = self.get_user(App.gogs_user_token) if not user: raise Exception('Invalid user_token. User not found.') data['user'] = user.username del data['gogs_user_token'] return TxJob.query()
def test_client_converter_callback_multiple_job_complete_error(self, mock_download_file): # given self.source_zip = os.path.join(self.resources_dir, "raw_sources/en-ulb.zip") identifier = 'job1/2/0/01-GEN.usfm' tx_job = TxJob.get('job1') tx_job.errors = ['conversion failed'] tx_job.update() self.s3_results_key = 'u/tx-manager-test-data/en-ulb/22f3d09f7a/0' mock_cccb = self.mock_client_converter_callback(identifier, mock_download_file) self.generate_parts_completed(0, 2) expect_error = True # when results = mock_cccb.process_callback() # then self.validate_results(expect_error, results)
def test_client_converter_callback_multiple_job_complete_error( self, mock_download_file): # given self.source_zip = os.path.join(self.resources_dir, "raw_sources/en-ulb.zip") identifier = 'job1/2/0/01-GEN.usfm' tx_job = TxJob.get('job1') tx_job.errors = ['conversion failed'] tx_job.update() self.s3_results_key = 'u/tx-manager-test-data/en-ulb/22f3d09f7a/0' mock_cccb = self.mock_client_converter_callback( identifier, mock_download_file) self.generate_parts_completed(0, 2) expect_error = True # when results = mock_cccb.process_callback() # then self.validate_results(expect_error, results)
def test_process_webhook(self, mock_download_file): # given client_web_hook = self.setup_client_webhook_mock('kpb_mat_text_udb_repo', mock_download_file) expected_job_count = 1 expected_error_count = 0 # when results = client_web_hook.process_webhook() # then self.validateResults(results, expected_job_count, expected_error_count) # Check repo was added to manifest table repo_name = client_web_hook.commit_data['repository']['name'] user_name = client_web_hook.commit_data['repository']['owner']['username'] tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name) tx_job = TxJob.get(results['job_id']) self.assertEqual(tx_manifest.repo_name, client_web_hook.commit_data['repository']['name']) self.assertEqual(tx_manifest.resource_id, 'udb') self.assertEqual(tx_manifest.lang_code, 'kpb') self.assertEqual(tx_manifest.id, tx_job.manifests_id)
def poll_until_job_finished(self, job_id): success = False job = None polling_timeout = 5 * 60 # poll for up to 5 minutes for job to complete or error sleep_interval = 5 # how often to check for completion start = time.time() end = start + polling_timeout while time.time() < end: time.sleep(sleep_interval) job = TxJob.get(job_id) self.assertIsNotNone(job) elapsed_seconds = elapsed_time(start) App.logger.debug("job " + job_id + " status at " + str(elapsed_seconds) + ":\n" + str(job.log)) if job.ended_at is not None: success = True break if not success: self.warn("Timeout Waiting for start on job: " + job_id) return success, job
def test_process_webhook(self, mock_download_file): # given client_web_hook = self.setup_client_webhook_mock( 'kpb_mat_text_udb_repo', mock_download_file) expected_job_count = 1 expected_error_count = 0 # when results = client_web_hook.process_webhook() # then self.validateResults(results, expected_job_count, expected_error_count) # Check repo was added to manifest table repo_name = client_web_hook.commit_data['repository']['name'] user_name = client_web_hook.commit_data['repository']['owner'][ 'username'] tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name) tx_job = TxJob.get(results['job_id']) self.assertEqual(tx_manifest.repo_name, client_web_hook.commit_data['repository']['name']) self.assertEqual(tx_manifest.resource_id, 'udb') self.assertEqual(tx_manifest.lang_code, 'kpb') self.assertEqual(tx_manifest.id, tx_job.manifests_id)
def populate_table(self): for idx in self.items: tx_job = TxJob(**self.items[idx]) tx_job.insert()
def process_webhook(self): # Check that we got commit data if not self.commit_data: raise Exception('No commit data from DCS was found in the Payload') # Check that the user token is valid if not App.gogs_user_token: raise Exception('DCS user token not given in Payload.') user = App.gogs_handler().get_user(App.gogs_user_token) if not user: raise Exception('Invalid DCS user token given in Payload') # Check that the URL to the DCS repo is valid if not self.commit_data['repository']['html_url'].startswith(App.gogs_url): raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url)) # Check that commit is on repo's default branch, else quit try: commit_branch = self.commit_data['ref'].split('/')[2] except IndexError: raise Exception('Could not determine commit branch, exiting.') except KeyError: Exception('This does not appear to be a push, exiting.') if commit_branch != self.commit_data['repository']['default_branch']: raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch)) # Get the commit_id, commit_url commit_id = self.commit_data['after'] commit = None for commit in self.commit_data['commits']: if commit['id'] == commit_id: break commit_id = commit_id[:10] # Only use the short form commit_url = commit['url'] # Gather other details from the commit that we will note for the job(s) user_name = self.commit_data['repository']['owner']['username'] repo_name = self.commit_data['repository']['name'] compare_url = self.commit_data['compare_url'] commit_message = commit['message'] if 'pusher' in self.commit_data: pusher = self.commit_data['pusher'] else: pusher = {'username': commit['author']['username']} pusher_username = pusher['username'] # Download and unzip the repo files repo_dir = self.get_repo_files(commit_url, repo_name) # Get the resource container rc = RC(repo_dir, repo_name) # Save manifest to manifest table manifest_data = { 'repo_name': repo_name, 'user_name': user_name, 'lang_code': rc.resource.language.identifier, 'resource_id': rc.resource.identifier, 'resource_type': rc.resource.type, 'title': rc.resource.title, 'manifest': json.dumps(rc.as_dict()), 'last_updated': datetime.utcnow() } # First see if manifest already exists in DB and update it if it is tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name) if tx_manifest: for key, value in manifest_data.iteritems(): setattr(tx_manifest, key, value) App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data)) tx_manifest.update() else: tx_manifest = TxManifest(**manifest_data) App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest)) tx_manifest.insert() # Preprocess the files preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_') results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir) # Zip up the massaged files zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip') App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath)) add_contents_to_zip(zip_filepath, preprocess_dir) App.logger.debug('finished.') # Upload zipped file to the S3 bucket file_key = self.upload_zip_file(commit_id, zip_filepath) job = TxJob() job.job_id = self.get_unique_job_id() job.identifier = job.job_id job.user_name = user_name job.repo_name = repo_name job.commit_id = commit_id job.manifests_id = tx_manifest.id job.created_at = datetime.utcnow() job.user = user.username # Username of the token, not necessarily the repo's owner job.input_format = rc.resource.file_ext job.resource_type = rc.resource.identifier job.source = self.source_url_base + "/" + file_key job.cdn_bucket = App.cdn_bucket job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id) job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, job.cdn_file) job.callback = App.api_url + '/client/callback' job.output_format = 'html' job.links = { "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id), "rel": "self", "method": "GET" } job.success = False converter = self.get_converter_module(job) linter = self.get_linter_module(job) if converter: job.convert_module = converter.name job.started_at = datetime.utcnow() job.expires_at = job.started_at + timedelta(days=1) job.eta = job.started_at + timedelta(minutes=5) job.status = 'started' job.message = 'Conversion started...' job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)) else: job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type, job.input_format, job.output_format)) job.message = 'No converter found' job.status = 'failed' if linter: job.lint_module = linter.name else: App.logger.debug('No linter was found to lint {0}'.format(job.resource_type)) job.insert() # Get S3 bucket/dir ready s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id) self.clear_commit_directory_in_cdn(s3_commit_key) # Create a build log build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job, pusher_username, repo_name, user_name) # Upload an initial build_log self.upload_build_log_to_s3(build_log_json, s3_commit_key) # Update the project.json file self.update_project_json(commit_id, job, repo_name, user_name) # Convert and lint if converter: if not preprocessor.is_multiple_jobs(): self.send_request_to_converter(job, converter) if linter: extra_payload = { 's3_results_key': s3_commit_key } self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload) else: # ----------------------------- # multiple book project # ----------------------------- books = preprocessor.get_book_list() App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books)) book_count = len(books) build_log_json['multiple'] = True build_log_json['build_logs'] = [] for i in range(0, len(books)): book = books[i] App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count)) # Send job request to tx-manager if i == 0: book_job = job # use the original job created above for the first book book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book) else: book_job = job.clone() # copy the original job for this book's job book_job.job_id = self.get_unique_job_id() book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book) book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id) book_job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, book_job.cdn_file) book_job.links = { "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id), "rel": "self", "method": "GET" } book_job.insert() book_job.source = self.build_multipart_source(file_key, book) book_job.update() book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job, pusher_username, repo_name, user_name) if len(book) > 0: part = str(i) book_build_log['book'] = book book_build_log['part'] = part build_log_json['build_logs'].append(book_build_log) self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/") self.send_request_to_converter(book_job, converter) if linter: extra_payload = { 'single_file': book, 's3_results_key': '{0}/{1}'.format(s3_commit_key, i) } self.send_request_to_linter(book_job, linter, commit_url, extra_payload) remove_tree(self.base_temp_dir) # cleanup return build_log_json
def generate_dashboard(self, max_failures=MAX_FAILURES): """ Generate page with metrics indicating configuration of tx-manager. :param int max_failures: """ App.logger.debug("Start: generateDashboard") dashboard = { 'title': 'tX-Manager Dashboard', 'body': 'No modules found' } items = sorted(TxModule().query(), key=lambda k: k.name) if items and len(items): module_names = [] for item in items: module_names.append(item.name) App.logger.debug("Found: " + str(len(items)) + " item[s] in tx-module") App.logger.debug("Reading from Jobs table") registered_jobs = self.list_jobs({"convert_module": {"condition": "is_in", "value": module_names}}, False) total_job_count = TxJob.query().count() registered_job_count = registered_jobs.count() App.logger.debug("Finished reading from Jobs table") # sanity check since AWS can be slow to update job count reported in table (every 6 hours) if registered_job_count > total_job_count: total_job_count = registered_job_count body = BeautifulSoup('<h1>TX-Manager Dashboard - {0}</h1>' '<h2>Module Attributes</h2><br><table id="status"></table>'.format(datetime.now()), 'html.parser') for item in items: module_name = item.name App.logger.debug(module_name) body.table.append(BeautifulSoup( '<tr id="' + module_name + '"><td class="hdr" colspan="2">' + str(module_name) + '</td></tr>', 'html.parser')) self.get_jobs_counts_for_module(registered_jobs, module_name) # TBD the following code almosts walks the db record replacing next 11 lines # for attr, val in item: # if (attr != 'name') and (len(attr) > 0): # rec += ' <tr><td class="lbl">' + attr.replace("_", " ").title() + ':</td><td>' + "lst(val)" + "</td></tr>\n" # rec += '<tr><td colspan="2"></td></tr>' body.table.append(BeautifulSoup( '<tr id="' + module_name + '-type" class="module-type"><td class="lbl">Type:</td><td>' + str(item.type) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-input" class="module-input"><td class="lbl">Input Format:</td><td>' + json.dumps(item.input_format) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-output" class="module-output">' + '<td class="lbl">Output Format:</td><td>' + json.dumps(item.output_format) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-resource" class="module-resource"><td class="lbl">Resource Types:</td>' '<td>' + json.dumps(item.resource_types) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-version" class="module-version"><td class="lbl">Version:</td><td>' + str(item.version) + '</td></tr>', 'html.parser')) if len(item.options) > 0: body.table.append(BeautifulSoup( '<tr id="' + module_name + '-options" class="module-options">' + '<td class="lbl">Options:</td><td>' + json.dumps(item.options) + '</td></tr>', 'html.parser')) if len(item.private_links) > 0: body.table.append(BeautifulSoup( '<tr id="' + module_name + '-private-links" class="module-private-links">' + '<td class="lbl">Private Links:</td><td>' + json.dumps(item.private_links) + '</td></tr>', 'html.parser')) if len(item.public_links) > 0: body.table.append(BeautifulSoup( '<tr id="' + module_name + '-public-links" class="module-public-links">' + '<td class="lbl">Public Links:</td><td>' + json.dumps(item.public_links) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-job-success" class="module-public-links">' + '<td class="lbl">Job Successes:</td><td>' + str(self.jobs_success) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-job-warning" class="module-public-links">' + '<td class="lbl">Job Warnings:</td><td>' + str(self.jobs_warnings) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-job-failure" class="module-public-links">' + '<td class="lbl">Job Failures:</td><td>' + str(self.jobs_failures) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="' + module_name + '-job-total" class="module-public-links">' + '<td class="lbl">Jobs Total:</td><td>' + str(self.jobs_total) + '</td></tr>', 'html.parser')) self.get_jobs_counts(registered_jobs) body.table.append(BeautifulSoup( '<tr id="totals"><td class="hdr" colspan="2">Total Jobs</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="totals-job-success" class="module-public-links"><td class="lbl">Success:</td><td>' + str(self.jobs_success) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="totals-job-warning" class="module-public-links"><td class="lbl">Warnings:</td><td>' + str(self.jobs_warnings) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="totals-job-failure" class="module-public-links"><td class="lbl">Failures:</td><td>' + str(self.jobs_failures) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="totals-job-unregistered" class="module-public-links"><td class="lbl">Unregistered:</td><td>' + str(total_job_count - self.jobs_total) + '</td></tr>', 'html.parser')) body.table.append(BeautifulSoup( '<tr id="totals-job-total" class="module-public-links"><td class="lbl">Total:</td><td>' + str(total_job_count) + '</td></tr>', 'html.parser')) # build job failures table job_failures = self.get_job_failures(registered_jobs, max_failures) body.append(BeautifulSoup('<h2>Failed Jobs</h2>', 'html.parser')) failure_table = BeautifulSoup('<table id="failed" cellpadding="4" border="1" ' + 'style="border-collapse:collapse"></table>', 'html.parser') failure_table.table.append(BeautifulSoup(''' <tr id="header"> <th class="hdr">Time</th> <th class="hdr">Errors</th> <th class="hdr">Repo</th> <th class="hdr">PreConvert</th> <th class="hdr">Converted</th> <th class="hdr">Destination</th>''', 'html.parser')) gogs_url = App.gogs_url if gogs_url is None: gogs_url = 'https://git.door43.org' for i in range(0, len(job_failures)): item = job_failures[i] try: identifier = item.identifier user_name, repo_name, commit_id = identifier.split('/')[:3] source_sub_path = '{0}/{1}'.format(user_name, repo_name) cdn_bucket = item.cdn_bucket destination_url = 'https://{0}/u/{1}/{2}/{3}/build_log.json'.format(cdn_bucket, user_name, repo_name, commit_id) repo_url = gogs_url + "/" + source_sub_path preconverted_url = item.source converted_url = item.output failure_table.table.append(BeautifulSoup( '<tr id="failure-' + str(i) + '" class="module-job-id">' + '<td>' + item.created_at.strftime("%Y-%m-%dT%H:%M:%SZ") + '</td>' + '<td>' + ','.join(item.errors) + '</td>' + '<td><a href="' + repo_url + '">' + source_sub_path + '</a></td>' + '<td><a href="' + preconverted_url + '">' + preconverted_url.rsplit('/', 1)[1] + '</a></td>' + '<td><a href="' + converted_url + '">' + item.job_id + '.zip</a></td>' + '<td><a href="' + destination_url + '">Build Log</a></td>' + '</tr>', 'html.parser')) except Exception as e: pass body.append(failure_table) self.build_language_popularity_tables(body, max_failures) body_html = body.prettify('UTF-8') dashboard['body'] = body_html # save to cdn in case HTTP connection times out try: self.temp_dir = tempfile.mkdtemp(suffix="", prefix="dashboard_") temp_file = os.path.join(self.temp_dir, "index.html") file_utils.write_file(temp_file, body_html) cdn_handler = App.cdn_s3_handler() cdn_handler.upload_file(temp_file, 'dashboard/index.html') except Exception as e: App.logger.debug("Could not save dashboard: " + str(e)) else: App.logger.debug("No modules found.") App.db().close() return dashboard
def process_callback(self): job_id_parts = self.identifier.split('/') job_id = job_id_parts[0] self.job = TxJob.get(job_id) if not self.job: error = 'No job found for job_id = {0}, identifier = {0}'.format(job_id, self.identifier) App.logger.error(error) raise Exception(error) if len(job_id_parts) == 4: part_count, part_id, book = job_id_parts[1:] App.logger.debug('Multiple project, part {0} of {1}, converting book {2}'. format(part_id, part_count, book)) multiple_project = True else: App.logger.debug('Single project') part_id = None multiple_project = False self.job.ended_at = datetime.utcnow() self.job.success = self.success for message in self.log: self.job.log_message(message) for message in self.warnings: self.job.warnings_message(message) for message in self.errors: self.job.error_message(message) if len(self.errors): self.job.log_message('{0} function returned with errors.'.format(self.job.convert_module)) elif len(self.warnings): self.job.log_message('{0} function returned with warnings.'.format(self.job.convert_module)) else: self.job.log_message('{0} function returned successfully.'.format(self.job.convert_module)) if not self.success or len(self.job.errors): self.job.success = False self.job.status = "failed" message = "Conversion failed" App.logger.debug("Conversion failed, success: {0}, errors: {1}".format(self.success, self.job.errors)) elif len(self.job.warnings) > 0: self.job.success = True self.job.status = "warnings" message = "Conversion successful with warnings" else: self.job.success = True self.job.status = "success" message = "Conversion successful" self.job.message = message self.job.log_message(message) self.job.log_message('Finished job {0} at {1}'.format(self.job.job_id, self.job.ended_at.strftime("%Y-%m-%dT%H:%M:%SZ"))) s3_commit_key = 'u/{0}/{1}/{2}'.format(self.job.user_name, self.job.repo_name, self.job.commit_id) upload_key = s3_commit_key if multiple_project: upload_key += "/" + part_id App.logger.debug('Callback for commit {0}...'.format(s3_commit_key)) # Download the ZIP file of the converted files converted_zip_url = self.job.output converted_zip_file = os.path.join(self.temp_dir, converted_zip_url.rpartition('/')[2]) remove(converted_zip_file) # make sure old file not present download_success = True App.logger.debug('Downloading converted zip file from {0}...'.format(converted_zip_url)) try: download_file(converted_zip_url, converted_zip_file) except: download_success = False # if multiple project we note fail and move on if not multiple_project: remove_tree(self.temp_dir) # cleanup if self.job.errors is None: self.job.errors = [] self.job.errors.append("Missing converted file: " + converted_zip_url) finally: App.logger.debug('download finished, success={0}'.format(str(download_success))) self.job.update() if download_success: # Unzip the archive unzip_dir = self.unzip_converted_files(converted_zip_file) # Upload all files to the cdn_bucket with the key of <user>/<repo_name>/<commit> of the repo self.upload_converted_files(upload_key, unzip_dir) if multiple_project: # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log build_log_json = self.update_convert_log(s3_commit_key, part_id + "/") # mark current part as finished self.cdn_upload_contents({}, s3_commit_key + '/' + part_id + '/finished') else: # single part conversion # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log build_log_json = self.update_convert_log(s3_commit_key) self.cdn_upload_contents({}, s3_commit_key + '/finished') # flag finished results = ClientLinterCallback.deploy_if_conversion_finished(s3_commit_key, self.identifier) if results: self.all_parts_completed = True build_log_json = results remove_tree(self.temp_dir) # cleanup return build_log_json
def test_get_converter_module(self): job = TxJob(**self.job_data) cw = ClientWebhook() converter = cw.get_converter_module(job) self.assertIsNotNone(converter) self.assertEqual(converter.name, 'md2html')
def test_get_linter_module(self): job = TxJob(**self.job_data) cw = ClientWebhook() linter = cw.get_linter_module(job) self.assertIsNotNone(linter) self.assertEqual(linter.name, 'obs')
def process_webhook(self): # Check that we got commit data if not self.commit_data: raise Exception('No commit data from DCS was found in the Payload') # Check that the user token is valid if not App.gogs_user_token: raise Exception('DCS user token not given in Payload.') user = App.gogs_handler().get_user(App.gogs_user_token) if not user: raise Exception('Invalid DCS user token given in Payload') # Check that the URL to the DCS repo is valid if not self.commit_data['repository']['html_url'].startswith(App.gogs_url): raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url)) # Check that commit is on repo's default branch, else quit try: commit_branch = self.commit_data['ref'].split('/')[2] except IndexError: raise Exception('Could not determine commit branch, exiting.') except KeyError: Exception('This does not appear to be a push, exiting.') if commit_branch != self.commit_data['repository']['default_branch']: raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch)) # Get the commit_id, commit_url commit_id = self.commit_data['after'] commit = None for commit in self.commit_data['commits']: if commit['id'] == commit_id: break commit_id = commit_id[:10] # Only use the short form commit_url = commit['url'] # Gather other details from the commit that we will note for the job(s) user_name = self.commit_data['repository']['owner']['username'] repo_name = self.commit_data['repository']['name'] compare_url = self.commit_data['compare_url'] commit_message = commit['message'] if 'pusher' in self.commit_data: pusher = self.commit_data['pusher'] else: pusher = {'username': commit['author']['username']} pusher_username = pusher['username'] # Download and unzip the repo files repo_dir = self.get_repo_files(commit_url, repo_name) # Get the resource container rc = RC(repo_dir, repo_name) # Save manifest to manifest table manifest_data = { 'repo_name': repo_name, 'user_name': user_name, 'lang_code': rc.resource.language.identifier, 'resource_id': rc.resource.identifier, 'resource_type': rc.resource.type, 'title': rc.resource.title, 'manifest': json.dumps(rc.as_dict()), 'last_updated': datetime.utcnow() } # First see if manifest already exists in DB and update it if it is tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name) if tx_manifest: for key, value in manifest_data.iteritems(): setattr(tx_manifest, key, value) App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data)) tx_manifest.update() else: tx_manifest = TxManifest(**manifest_data) App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest)) tx_manifest.insert() # Preprocess the files preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_') results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir) # Zip up the massaged files zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip') App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath)) add_contents_to_zip(zip_filepath, preprocess_dir) App.logger.debug('finished.') # Upload zipped file to the S3 bucket file_key = self.upload_zip_file(commit_id, zip_filepath) job = TxJob() job.job_id = self.get_unique_job_id() job.identifier = job.job_id job.user_name = user_name job.repo_name = repo_name job.commit_id = commit_id job.manifests_id = tx_manifest.id job.created_at = datetime.utcnow() job.user = user.username # Username of the token, not necessarily the repo's owner job.input_format = rc.resource.file_ext job.resource_type = rc.resource.identifier job.source = self.source_url_base + "/" + file_key job.cdn_bucket = App.cdn_bucket job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id) job.output = 'https://{0}/{1}'.format(App.cdn_bucket, job.cdn_file) job.callback = App.api_url + '/client/callback' job.output_format = 'html' job.links = { "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id), "rel": "self", "method": "GET" } job.success = False converter = self.get_converter_module(job) linter = self.get_linter_module(job) if converter: job.convert_module = converter.name job.started_at = datetime.utcnow() job.expires_at = job.started_at + timedelta(days=1) job.eta = job.started_at + timedelta(minutes=5) job.status = 'started' job.message = 'Conversion started...' job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)) else: job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type, job.input_format, job.output_format)) job.message = 'No converter found' job.status = 'failed' if linter: job.lint_module = linter.name else: App.logger.debug('No linter was found to lint {0}'.format(job.resource_type)) job.insert() # Get S3 bucket/dir ready s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id) self.clear_commit_directory_in_cdn(s3_commit_key) # Create a build log build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job, pusher_username, repo_name, user_name) # Upload an initial build_log self.upload_build_log_to_s3(build_log_json, s3_commit_key) # Update the project.json file self.update_project_json(commit_id, job, repo_name, user_name) # Convert and lint if converter: if not preprocessor.is_multiple_jobs(): self.send_request_to_converter(job, converter) if linter: extra_payload = { 's3_results_key': s3_commit_key } self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload) else: # ----------------------------- # multiple book project # ----------------------------- books = preprocessor.get_book_list() App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books)) book_count = len(books) build_log_json['multiple'] = True build_log_json['build_logs'] = [] for i in range(0, len(books)): book = books[i] App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count)) # Send job request to tx-manager if i == 0: book_job = job # use the original job created above for the first book book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book) else: book_job = job.clone() # copy the original job for this book's job book_job.job_id = self.get_unique_job_id() book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book) book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id) book_job.output = 'https://{0}/{1}'.format(App.cdn_bucket, book_job.cdn_file) book_job.links = { "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id), "rel": "self", "method": "GET" } book_job.insert() book_job.source = self.build_multipart_source(file_key, book) book_job.update() book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job, pusher_username, repo_name, user_name) if len(book) > 0: part = str(i) book_build_log['book'] = book book_build_log['part'] = part build_log_json['build_logs'].append(book_build_log) self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/") self.send_request_to_converter(book_job, converter) if linter: extra_payload = { 'single_file': book, 's3_results_key': '{0}/{1}'.format(s3_commit_key, i) } self.send_request_to_linter(book_job, linter, commit_url, extra_payload) remove_tree(self.base_temp_dir) # cleanup return build_log_json
def test_update_job(self): job = TxJob.get(self.items['job3']['job_id']) job.status = 'finished' job.update() job = TxJob.get(self.items['job3']['job_id']) self.assertEqual(job.status, 'finished')
def test_delete_job(self): job = TxJob.get(self.items['job1']['job_id']) self.assertIsNotNone(job) job.delete() job = TxJob.get(self.items['job1']['job_id']) self.assertIsNone(job)
def process_callback(self): job_id_parts = self.identifier.split('/') job_id = job_id_parts[0] self.job = TxJob.get(job_id) if not self.job: error = 'No job found for job_id = {0}, identifier = {1}'.format( job_id, self.identifier) App.logger.error(error) raise Exception(error) if len(job_id_parts) == 4: part_count, part_id, book = job_id_parts[1:] App.logger.debug( 'Multiple project, part {0} of {1}, converting book {2}'. format(part_id, part_count, book)) multiple_project = True else: App.logger.debug('Single project') part_id = None multiple_project = False self.job.ended_at = datetime.utcnow() self.job.success = self.success for message in self.log: self.job.log_message(message) for message in self.warnings: self.job.warnings_message(message) for message in self.errors: self.job.error_message(message) if len(self.errors): self.job.log_message('{0} function returned with errors.'.format( self.job.convert_module)) elif len(self.warnings): self.job.log_message('{0} function returned with warnings.'.format( self.job.convert_module)) else: self.job.log_message('{0} function returned successfully.'.format( self.job.convert_module)) if not self.success or len(self.job.errors): self.job.success = False self.job.status = "failed" message = "Conversion failed" App.logger.debug( "Conversion failed, success: {0}, errors: {1}".format( self.success, self.job.errors)) elif len(self.job.warnings) > 0: self.job.success = True self.job.status = "warnings" message = "Conversion successful with warnings" else: self.job.success = True self.job.status = "success" message = "Conversion successful" self.job.message = message self.job.log_message(message) self.job.log_message('Finished job {0} at {1}'.format( self.job.job_id, self.job.ended_at.strftime("%Y-%m-%dT%H:%M:%SZ"))) s3_commit_key = 'u/{0}/{1}/{2}'.format(self.job.user_name, self.job.repo_name, self.job.commit_id) upload_key = s3_commit_key if multiple_project: upload_key += "/" + part_id App.logger.debug('Callback for commit {0}...'.format(s3_commit_key)) # Download the ZIP file of the converted files converted_zip_url = self.job.output converted_zip_file = os.path.join(self.temp_dir, converted_zip_url.rpartition('/')[2]) remove(converted_zip_file) # make sure old file not present download_success = True App.logger.debug('Downloading converted zip file from {0}...'.format( converted_zip_url)) try: download_file(converted_zip_url, converted_zip_file) except: download_success = False # if multiple project we note fail and move on if not multiple_project: remove_tree(self.temp_dir) # cleanup if self.job.errors is None: self.job.errors = [] self.job.errors.append("Missing converted file: " + converted_zip_url) finally: App.logger.debug('download finished, success={0}'.format( str(download_success))) self.job.update() if download_success: # Unzip the archive unzip_dir = self.unzip_converted_files(converted_zip_file) # Upload all files to the cdn_bucket with the key of <user>/<repo_name>/<commit> of the repo self.upload_converted_files(upload_key, unzip_dir) if multiple_project: # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log build_log_json = self.update_convert_log(s3_commit_key, part_id + "/") # mark current part as finished self.cdn_upload_contents({}, s3_commit_key + '/' + part_id + '/finished') else: # single part conversion # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log build_log_json = self.update_convert_log(s3_commit_key) self.cdn_upload_contents({}, s3_commit_key + '/finished') # flag finished results = ClientLinterCallback.deploy_if_conversion_finished( s3_commit_key, self.identifier) if results: self.all_parts_completed = True build_log_json = results remove_tree(self.temp_dir) # cleanup return build_log_json
def test_load_job(self): # Test loading by just giving it the job_id in the constructor job = TxJob.get('job1') self.assertEqual(job.identifier, self.items['job1']['identifier'])