def update_jobs_table(s3_results_key, build_log, output_dir):
        job_id = build_log['job_id']
        App.logger.debug('merging build_logs for job : ' + job_id)
        build_log['ended_at'] = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
        job = TxJob.get(job_id)
        if job:
            job.status = build_log['status']
            job.log = build_log['log']
            job.warnings = build_log['warnings']
            job.errors = build_log['errors']
            job.message = build_log['message']
            job.success = build_log['success']
            job.ended_at = build_log['ended_at']

            # set overall status
            if len(job.errors):
                job.status = 'errors'
                job.success = False
            elif len(job.warnings):
                job.status = 'warnings'

            job.update()
        else:
            job_data = {'manifests_id': 0}  # set a default if not present
            for key in build_log:
                if hasattr(TxJob, key):
                    job_data[key] = build_log[key]
            job = TxJob(**job_data)
            job.insert()

        # flag this part as done
        ClientLinterCallback.upload_build_log(build_log, 'merged.json', output_dir, s3_results_key)
        # update build_log to start deploy of this part
        ClientLinterCallback.upload_build_log(build_log, 'build_log.json', output_dir, s3_results_key, cache_time=600)
        return
    def populate_tables(self):
        for idx in self.job_items:
            tx_job = TxJob(**self.job_items[idx])
            tx_job.insert()

        for idx in self.module_items:
            tx_module = TxModule(**self.module_items[idx])
            tx_module.insert()
    def poll_until_all_jobs_finished(self, build_logs):
        job = None
        finished = []
        job_count = len(build_logs)
        polling_timeout = 5 * 60  # poll for up to 5 minutes for job to complete or error
        sleep_interval = 5  # how often to check for completion
        done = False
        start = time.time()
        end = start + polling_timeout
        while (time.time() < end) and not done:
            time.sleep(sleep_interval)  # delay before polling again
            for build_log in build_logs:  # check for completion of each part
                job_id = build_log['job_id']
                if job_id in finished:
                    continue  # skip if job already finished

                job = TxJob.get(job_id)
                self.assertIsNotNone(job)
                App.logger.debug("job " + job_id + " status at " + str(elapsed_time(start)) + ":\n" + str(job.log))

                if job.ended_at is not None:
                    finished.append(job_id)
                    end = time.time() + polling_timeout  # reset timeout
                    if len(finished) >= job_count:
                        done = True  # finished
                        break

        if len(finished) < job_count:
            for build_log in build_logs:  # check for completion of each part
                job_id = build_log['job_id']
                if job_id not in finished:
                    self.warn("Timeout waiting for start on job: " + job_id)

        return done, job
Beispiel #4
0
 def get_unique_job_id(self):
     """
     :return string:
     """
     job_id = hashlib.sha256(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")).hexdigest()
     while TxJob.get(job_id):
         job_id = hashlib.sha256(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")).hexdigest()
     return job_id
 def get_unique_job_id(self):
     """
     :return string:
     """
     job_id = hashlib.sha256(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")).hexdigest()
     while TxJob.get(job_id):
         job_id = hashlib.sha256(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")).hexdigest()
     return job_id
Beispiel #6
0
 def test_query_job(self):
     jobs = TxJob.query()
     App.logger.debug(jobs)
     self.assertEqual(jobs.count(), len(self.items))
     for job in jobs:
         print(job)
         self.assertEqual(job.identifier,
                          self.items[job.job_id]['identifier'])
Beispiel #7
0
 def list_jobs(self, data, must_be_authenticated=True):
     if must_be_authenticated:
         if 'gogs_user_token' not in data:
             raise Exception('"gogs_user_token" not given.')
         App.gogs_user_token = data['gogs_user_token']
         user = self.get_user(App.gogs_user_token)
         if not user:
             raise Exception('Invalid user_token. User not found.')
         data['user'] = user.username
         del data['gogs_user_token']
     return TxJob.query()
    def test_client_converter_callback_multiple_job_complete_error(self, mock_download_file):
        # given
        self.source_zip = os.path.join(self.resources_dir, "raw_sources/en-ulb.zip")
        identifier = 'job1/2/0/01-GEN.usfm'
        tx_job = TxJob.get('job1')
        tx_job.errors = ['conversion failed']
        tx_job.update()
        self.s3_results_key = 'u/tx-manager-test-data/en-ulb/22f3d09f7a/0'
        mock_cccb = self.mock_client_converter_callback(identifier, mock_download_file)
        self.generate_parts_completed(0, 2)
        expect_error = True

        # when
        results = mock_cccb.process_callback()

        # then
        self.validate_results(expect_error, results)
Beispiel #9
0
    def test_client_converter_callback_multiple_job_complete_error(
            self, mock_download_file):
        # given
        self.source_zip = os.path.join(self.resources_dir,
                                       "raw_sources/en-ulb.zip")
        identifier = 'job1/2/0/01-GEN.usfm'
        tx_job = TxJob.get('job1')
        tx_job.errors = ['conversion failed']
        tx_job.update()
        self.s3_results_key = 'u/tx-manager-test-data/en-ulb/22f3d09f7a/0'
        mock_cccb = self.mock_client_converter_callback(
            identifier, mock_download_file)
        self.generate_parts_completed(0, 2)
        expect_error = True

        # when
        results = mock_cccb.process_callback()

        # then
        self.validate_results(expect_error, results)
    def test_process_webhook(self, mock_download_file):
        # given
        client_web_hook = self.setup_client_webhook_mock('kpb_mat_text_udb_repo', mock_download_file)
        expected_job_count = 1
        expected_error_count = 0

        # when
        results = client_web_hook.process_webhook()

        # then
        self.validateResults(results, expected_job_count, expected_error_count)

        # Check repo was added to manifest table
        repo_name = client_web_hook.commit_data['repository']['name']
        user_name = client_web_hook.commit_data['repository']['owner']['username']
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        tx_job = TxJob.get(results['job_id'])
        self.assertEqual(tx_manifest.repo_name, client_web_hook.commit_data['repository']['name'])
        self.assertEqual(tx_manifest.resource_id, 'udb')
        self.assertEqual(tx_manifest.lang_code, 'kpb')
        self.assertEqual(tx_manifest.id, tx_job.manifests_id)
    def poll_until_job_finished(self, job_id):
        success = False
        job = None
        polling_timeout = 5 * 60  # poll for up to 5 minutes for job to complete or error
        sleep_interval = 5  # how often to check for completion
        start = time.time()
        end = start + polling_timeout
        while time.time() < end:
            time.sleep(sleep_interval)
            job = TxJob.get(job_id)
            self.assertIsNotNone(job)
            elapsed_seconds = elapsed_time(start)
            App.logger.debug("job " + job_id + " status at " + str(elapsed_seconds) + ":\n" + str(job.log))

            if job.ended_at is not None:
                success = True
                break

        if not success:
            self.warn("Timeout Waiting for start on job: " + job_id)

        return success, job
Beispiel #12
0
    def poll_until_job_finished(self, job_id):
        success = False
        job = None
        polling_timeout = 5 * 60  # poll for up to 5 minutes for job to complete or error
        sleep_interval = 5  # how often to check for completion
        start = time.time()
        end = start + polling_timeout
        while time.time() < end:
            time.sleep(sleep_interval)
            job = TxJob.get(job_id)
            self.assertIsNotNone(job)
            elapsed_seconds = elapsed_time(start)
            App.logger.debug("job " + job_id + " status at " +
                             str(elapsed_seconds) + ":\n" + str(job.log))

            if job.ended_at is not None:
                success = True
                break

        if not success:
            self.warn("Timeout Waiting for start on job: " + job_id)

        return success, job
Beispiel #13
0
    def test_process_webhook(self, mock_download_file):
        # given
        client_web_hook = self.setup_client_webhook_mock(
            'kpb_mat_text_udb_repo', mock_download_file)
        expected_job_count = 1
        expected_error_count = 0

        # when
        results = client_web_hook.process_webhook()

        # then
        self.validateResults(results, expected_job_count, expected_error_count)

        # Check repo was added to manifest table
        repo_name = client_web_hook.commit_data['repository']['name']
        user_name = client_web_hook.commit_data['repository']['owner'][
            'username']
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        tx_job = TxJob.get(results['job_id'])
        self.assertEqual(tx_manifest.repo_name,
                         client_web_hook.commit_data['repository']['name'])
        self.assertEqual(tx_manifest.resource_id, 'udb')
        self.assertEqual(tx_manifest.lang_code, 'kpb')
        self.assertEqual(tx_manifest.id, tx_job.manifests_id)
Beispiel #14
0
    def poll_until_all_jobs_finished(self, build_logs):
        job = None
        finished = []
        job_count = len(build_logs)
        polling_timeout = 5 * 60  # poll for up to 5 minutes for job to complete or error
        sleep_interval = 5  # how often to check for completion
        done = False
        start = time.time()
        end = start + polling_timeout
        while (time.time() < end) and not done:
            time.sleep(sleep_interval)  # delay before polling again
            for build_log in build_logs:  # check for completion of each part
                job_id = build_log['job_id']
                if job_id in finished:
                    continue  # skip if job already finished

                job = TxJob.get(job_id)
                self.assertIsNotNone(job)
                App.logger.debug("job " + job_id + " status at " +
                                 str(elapsed_time(start)) + ":\n" +
                                 str(job.log))

                if job.ended_at is not None:
                    finished.append(job_id)
                    end = time.time() + polling_timeout  # reset timeout
                    if len(finished) >= job_count:
                        done = True  # finished
                        break

        if len(finished) < job_count:
            for build_log in build_logs:  # check for completion of each part
                job_id = build_log['job_id']
                if job_id not in finished:
                    self.warn("Timeout waiting for start on job: " + job_id)

        return done, job
 def populate_table(self):
     for idx in self.items:
         tx_job = TxJob(**self.items[idx])
         tx_job.insert()
Beispiel #16
0
    def process_webhook(self):
        # Check that we got commit data
        if not self.commit_data:
            raise Exception('No commit data from DCS was found in the Payload')

        # Check that the user token is valid
        if not App.gogs_user_token:
            raise Exception('DCS user token not given in Payload.')
        user = App.gogs_handler().get_user(App.gogs_user_token)
        if not user:
            raise Exception('Invalid DCS user token given in Payload')

        # Check that the URL to the DCS repo is valid
        if not self.commit_data['repository']['html_url'].startswith(App.gogs_url):
            raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url))

        # Check that commit is on repo's default branch, else quit
        try:
            commit_branch = self.commit_data['ref'].split('/')[2]
        except IndexError:
            raise Exception('Could not determine commit branch, exiting.')
        except KeyError:
            Exception('This does not appear to be a push, exiting.')
        if commit_branch != self.commit_data['repository']['default_branch']:
            raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch))

        # Get the commit_id, commit_url
        commit_id = self.commit_data['after']
        commit = None
        for commit in self.commit_data['commits']:
            if commit['id'] == commit_id:
                break
        commit_id = commit_id[:10]  # Only use the short form
        commit_url = commit['url']


        # Gather other details from the commit that we will note for the job(s)
        user_name = self.commit_data['repository']['owner']['username']
        repo_name = self.commit_data['repository']['name']
        compare_url = self.commit_data['compare_url']
        commit_message = commit['message']

        if 'pusher' in self.commit_data:
            pusher = self.commit_data['pusher']
        else:
            pusher = {'username': commit['author']['username']}
        pusher_username = pusher['username']

        # Download and unzip the repo files
        repo_dir = self.get_repo_files(commit_url, repo_name)

        # Get the resource container
        rc = RC(repo_dir, repo_name)

        # Save manifest to manifest table
        manifest_data = {
            'repo_name': repo_name,
            'user_name': user_name,
            'lang_code': rc.resource.language.identifier,
            'resource_id': rc.resource.identifier,
            'resource_type': rc.resource.type,
            'title': rc.resource.title,
            'manifest': json.dumps(rc.as_dict()),
            'last_updated': datetime.utcnow()
        }
        # First see if manifest already exists in DB and update it if it is
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        if tx_manifest:
            for key, value in manifest_data.iteritems():
                setattr(tx_manifest, key, value)
            App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data))
            tx_manifest.update()
        else:
            tx_manifest = TxManifest(**manifest_data)
            App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest))
            tx_manifest.insert()

        # Preprocess the files
        preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_')
        results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir)

        # Zip up the massaged files
        zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip')
        App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath))
        add_contents_to_zip(zip_filepath, preprocess_dir)
        App.logger.debug('finished.')

        # Upload zipped file to the S3 bucket
        file_key = self.upload_zip_file(commit_id, zip_filepath)

        job = TxJob()
        job.job_id = self.get_unique_job_id()
        job.identifier = job.job_id
        job.user_name = user_name
        job.repo_name = repo_name
        job.commit_id = commit_id
        job.manifests_id = tx_manifest.id
        job.created_at = datetime.utcnow()
        job.user = user.username  # Username of the token, not necessarily the repo's owner
        job.input_format = rc.resource.file_ext
        job.resource_type = rc.resource.identifier
        job.source = self.source_url_base + "/" + file_key
        job.cdn_bucket = App.cdn_bucket
        job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id)
        job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, job.cdn_file)
        job.callback = App.api_url + '/client/callback'
        job.output_format = 'html'
        job.links = {
            "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id),
            "rel": "self",
            "method": "GET"
        }
        job.success = False

        converter = self.get_converter_module(job)
        linter = self.get_linter_module(job)

        if converter:
            job.convert_module = converter.name
            job.started_at = datetime.utcnow()
            job.expires_at = job.started_at + timedelta(days=1)
            job.eta = job.started_at + timedelta(minutes=5)
            job.status = 'started'
            job.message = 'Conversion started...'
            job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id))
        else:
            job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type,
                                                                                             job.input_format,
                                                                                             job.output_format))
            job.message = 'No converter found'
            job.status = 'failed'

        if linter:
            job.lint_module = linter.name
        else:
            App.logger.debug('No linter was found to lint {0}'.format(job.resource_type))

        job.insert()

        # Get S3 bucket/dir ready
        s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)
        self.clear_commit_directory_in_cdn(s3_commit_key)

        # Create a build log
        build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job,
                                               pusher_username, repo_name, user_name)
        # Upload an initial build_log
        self.upload_build_log_to_s3(build_log_json, s3_commit_key)

        # Update the project.json file
        self.update_project_json(commit_id, job, repo_name, user_name)

        # Convert and lint
        if converter:
            if not preprocessor.is_multiple_jobs():
                self.send_request_to_converter(job, converter)
                if linter:
                    extra_payload = {
                        's3_results_key': s3_commit_key
                    }
                    self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
            else:
                # -----------------------------
                # multiple book project
                # -----------------------------
                books = preprocessor.get_book_list()
                App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))
                book_count = len(books)
                build_log_json['multiple'] = True
                build_log_json['build_logs'] = []
                for i in range(0, len(books)):
                    book = books[i]
                    App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count))
                    # Send job request to tx-manager
                    if i == 0:
                        book_job = job  # use the original job created above for the first book
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book)
                    else:
                        book_job = job.clone()  # copy the original job for this book's job
                        book_job.job_id = self.get_unique_job_id()
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book)
                        book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id)
                        book_job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, book_job.cdn_file)
                        book_job.links = {
                            "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id),
                            "rel": "self",
                            "method": "GET"
                        }
                        book_job.insert()

                    book_job.source = self.build_multipart_source(file_key, book)
                    book_job.update()
                    book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job,
                                                           pusher_username, repo_name, user_name)
                    if len(book) > 0:
                        part = str(i)
                        book_build_log['book'] = book
                        book_build_log['part'] = part
                    build_log_json['build_logs'].append(book_build_log)
                    self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/")
                    self.send_request_to_converter(book_job, converter)
                    if linter:
                        extra_payload = {
                            'single_file': book,
                            's3_results_key': '{0}/{1}'.format(s3_commit_key, i)
                        }
                        self.send_request_to_linter(book_job, linter, commit_url, extra_payload)

        remove_tree(self.base_temp_dir)  # cleanup
        return build_log_json
Beispiel #17
0
    def generate_dashboard(self, max_failures=MAX_FAILURES):
        """
        Generate page with metrics indicating configuration of tx-manager.

        :param int max_failures:
        """
        App.logger.debug("Start: generateDashboard")

        dashboard = {
            'title': 'tX-Manager Dashboard',
            'body': 'No modules found'
        }

        items = sorted(TxModule().query(), key=lambda k: k.name)
        if items and len(items):
            module_names = []
            for item in items:
                module_names.append(item.name)

            App.logger.debug("Found: " + str(len(items)) + " item[s] in tx-module")
            App.logger.debug("Reading from Jobs table")

            registered_jobs = self.list_jobs({"convert_module": {"condition": "is_in", "value": module_names}}, False)
            total_job_count = TxJob.query().count()
            registered_job_count = registered_jobs.count()

            App.logger.debug("Finished reading from Jobs table")

            # sanity check since AWS can be slow to update job count reported in table (every 6 hours)
            if registered_job_count > total_job_count:
                total_job_count = registered_job_count

            body = BeautifulSoup('<h1>TX-Manager Dashboard - {0}</h1>'
                                 '<h2>Module Attributes</h2><br><table id="status"></table>'.format(datetime.now()),
                                 'html.parser')
            for item in items:
                module_name = item.name
                App.logger.debug(module_name)
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '"><td class="hdr" colspan="2">' + str(module_name) + '</td></tr>',
                    'html.parser'))

                self.get_jobs_counts_for_module(registered_jobs, module_name)

                # TBD the following code almosts walks the db record replacing next 11 lines
                # for attr, val in item:
                #    if (attr != 'name') and (len(attr) > 0):
                #       rec += '            <tr><td class="lbl">' + attr.replace("_", " ").title() + ':</td><td>' + "lst(val)" + "</td></tr>\n"
                # rec += '<tr><td colspan="2"></td></tr>'

                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-type" class="module-type"><td class="lbl">Type:</td><td>' +
                    str(item.type) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-input" class="module-input"><td class="lbl">Input Format:</td><td>' +
                    json.dumps(item.input_format) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-output" class="module-output">' +
                    '<td class="lbl">Output Format:</td><td>' +
                    json.dumps(item.output_format) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-resource" class="module-resource"><td class="lbl">Resource Types:</td>'
                    '<td>' + json.dumps(item.resource_types) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-version" class="module-version"><td class="lbl">Version:</td><td>' +
                    str(item.version) + '</td></tr>',
                    'html.parser'))

                if len(item.options) > 0:
                    body.table.append(BeautifulSoup(
                        '<tr id="' + module_name + '-options" class="module-options">' +
                        '<td class="lbl">Options:</td><td>' +
                        json.dumps(item.options) + '</td></tr>',
                        'html.parser'))

                if len(item.private_links) > 0:
                    body.table.append(BeautifulSoup(
                        '<tr id="' + module_name + '-private-links" class="module-private-links">' +
                        '<td class="lbl">Private Links:</td><td>' +
                        json.dumps(item.private_links) + '</td></tr>',
                        'html.parser'))

                if len(item.public_links) > 0:
                    body.table.append(BeautifulSoup(
                        '<tr id="' + module_name + '-public-links" class="module-public-links">' +
                        '<td class="lbl">Public Links:</td><td>' +
                        json.dumps(item.public_links) + '</td></tr>',
                        'html.parser'))

                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-job-success" class="module-public-links">' +
                    '<td class="lbl">Job Successes:</td><td>' +
                    str(self.jobs_success) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-job-warning" class="module-public-links">' +
                    '<td class="lbl">Job Warnings:</td><td>' +
                    str(self.jobs_warnings) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-job-failure" class="module-public-links">' +
                    '<td class="lbl">Job Failures:</td><td>' +
                    str(self.jobs_failures) + '</td></tr>',
                    'html.parser'))
                body.table.append(BeautifulSoup(
                    '<tr id="' + module_name + '-job-total" class="module-public-links">' +
                    '<td class="lbl">Jobs Total:</td><td>' +
                    str(self.jobs_total) + '</td></tr>',
                    'html.parser'))

            self.get_jobs_counts(registered_jobs)
            body.table.append(BeautifulSoup(
                '<tr id="totals"><td class="hdr" colspan="2">Total Jobs</td></tr>',
                'html.parser'))
            body.table.append(BeautifulSoup(
                '<tr id="totals-job-success" class="module-public-links"><td class="lbl">Success:</td><td>' +
                str(self.jobs_success) + '</td></tr>',
                'html.parser'))
            body.table.append(BeautifulSoup(
                '<tr id="totals-job-warning" class="module-public-links"><td class="lbl">Warnings:</td><td>' +
                str(self.jobs_warnings) + '</td></tr>',
                'html.parser'))
            body.table.append(BeautifulSoup(
                '<tr id="totals-job-failure" class="module-public-links"><td class="lbl">Failures:</td><td>' +
                str(self.jobs_failures) + '</td></tr>',
                'html.parser'))
            body.table.append(BeautifulSoup(
                '<tr id="totals-job-unregistered" class="module-public-links"><td class="lbl">Unregistered:</td><td>' +
                str(total_job_count - self.jobs_total) + '</td></tr>',
                'html.parser'))
            body.table.append(BeautifulSoup(
                '<tr id="totals-job-total" class="module-public-links"><td class="lbl">Total:</td><td>' +
                str(total_job_count) + '</td></tr>',
                'html.parser'))

            # build job failures table
            job_failures = self.get_job_failures(registered_jobs, max_failures)
            body.append(BeautifulSoup('<h2>Failed Jobs</h2>', 'html.parser'))
            failure_table = BeautifulSoup('<table id="failed" cellpadding="4" border="1" ' +
                                          'style="border-collapse:collapse"></table>', 'html.parser')
            failure_table.table.append(BeautifulSoup('''
                <tr id="header">
                <th class="hdr">Time</th>
                <th class="hdr">Errors</th>
                <th class="hdr">Repo</th>
                <th class="hdr">PreConvert</th>
                <th class="hdr">Converted</th>
                <th class="hdr">Destination</th>''', 'html.parser'))

            gogs_url = App.gogs_url
            if gogs_url is None:
                gogs_url = 'https://git.door43.org'

            for i in range(0, len(job_failures)):
                item = job_failures[i]

                try:
                    identifier = item.identifier
                    user_name, repo_name, commit_id = identifier.split('/')[:3]
                    source_sub_path = '{0}/{1}'.format(user_name, repo_name)
                    cdn_bucket = item.cdn_bucket
                    destination_url = 'https://{0}/u/{1}/{2}/{3}/build_log.json'.format(cdn_bucket, user_name,
                                                                                        repo_name, commit_id)
                    repo_url = gogs_url + "/" + source_sub_path
                    preconverted_url = item.source
                    converted_url = item.output
                    failure_table.table.append(BeautifulSoup(
                        '<tr id="failure-' + str(i) + '" class="module-job-id">'
                        + '<td>' + item.created_at.strftime("%Y-%m-%dT%H:%M:%SZ") + '</td>'
                        + '<td>' + ','.join(item.errors) + '</td>'
                        + '<td><a href="' + repo_url + '">' + source_sub_path + '</a></td>'
                        + '<td><a href="' + preconverted_url + '">' + preconverted_url.rsplit('/', 1)[1] + '</a></td>'
                        + '<td><a href="' + converted_url + '">' + item.job_id + '.zip</a></td>'
                        + '<td><a href="' + destination_url + '">Build Log</a></td>'
                        + '</tr>',
                        'html.parser'))
                except Exception as e:
                    pass

            body.append(failure_table)
            self.build_language_popularity_tables(body, max_failures)
            body_html = body.prettify('UTF-8')
            dashboard['body'] = body_html

            # save to cdn in case HTTP connection times out
            try:
                self.temp_dir = tempfile.mkdtemp(suffix="", prefix="dashboard_")
                temp_file = os.path.join(self.temp_dir, "index.html")
                file_utils.write_file(temp_file, body_html)
                cdn_handler = App.cdn_s3_handler()
                cdn_handler.upload_file(temp_file, 'dashboard/index.html')
            except Exception as e:
                App.logger.debug("Could not save dashboard: " + str(e))
        else:
            App.logger.debug("No modules found.")

        App.db().close()
        return dashboard
    def process_callback(self):
        job_id_parts = self.identifier.split('/')
        job_id = job_id_parts[0]
        self.job = TxJob.get(job_id)

        if not self.job:
            error = 'No job found for job_id = {0}, identifier = {0}'.format(job_id, self.identifier)
            App.logger.error(error)
            raise Exception(error)

        if len(job_id_parts) == 4:
            part_count, part_id, book = job_id_parts[1:]
            App.logger.debug('Multiple project, part {0} of {1}, converting book {2}'.
                             format(part_id, part_count, book))
            multiple_project = True
        else:
            App.logger.debug('Single project')
            part_id = None
            multiple_project = False

        self.job.ended_at = datetime.utcnow()
        self.job.success = self.success
        for message in self.log:
            self.job.log_message(message)
        for message in self.warnings:
            self.job.warnings_message(message)
        for message in self.errors:
            self.job.error_message(message)
        if len(self.errors):
            self.job.log_message('{0} function returned with errors.'.format(self.job.convert_module))
        elif len(self.warnings):
            self.job.log_message('{0} function returned with warnings.'.format(self.job.convert_module))
        else:
            self.job.log_message('{0} function returned successfully.'.format(self.job.convert_module))

        if not self.success or len(self.job.errors):
            self.job.success = False
            self.job.status = "failed"
            message = "Conversion failed"
            App.logger.debug("Conversion failed, success: {0}, errors: {1}".format(self.success, self.job.errors))
        elif len(self.job.warnings) > 0:
            self.job.success = True
            self.job.status = "warnings"
            message = "Conversion successful with warnings"
        else:
            self.job.success = True
            self.job.status = "success"
            message = "Conversion successful"

        self.job.message = message
        self.job.log_message(message)
        self.job.log_message('Finished job {0} at {1}'.format(self.job.job_id, self.job.ended_at.strftime("%Y-%m-%dT%H:%M:%SZ")))

        s3_commit_key = 'u/{0}/{1}/{2}'.format(self.job.user_name, self.job.repo_name, self.job.commit_id)
        upload_key = s3_commit_key
        if multiple_project:
            upload_key += "/" + part_id

        App.logger.debug('Callback for commit {0}...'.format(s3_commit_key))

        # Download the ZIP file of the converted files
        converted_zip_url = self.job.output
        converted_zip_file = os.path.join(self.temp_dir, converted_zip_url.rpartition('/')[2])
        remove(converted_zip_file)  # make sure old file not present
        download_success = True
        App.logger.debug('Downloading converted zip file from {0}...'.format(converted_zip_url))
        try:
            download_file(converted_zip_url, converted_zip_file)
        except:
            download_success = False  # if multiple project we note fail and move on
            if not multiple_project:
                remove_tree(self.temp_dir)  # cleanup
            if self.job.errors is None:
                self.job.errors = []
            self.job.errors.append("Missing converted file: " + converted_zip_url)
        finally:
            App.logger.debug('download finished, success={0}'.format(str(download_success)))

        self.job.update()

        if download_success:
            # Unzip the archive
            unzip_dir = self.unzip_converted_files(converted_zip_file)

            # Upload all files to the cdn_bucket with the key of <user>/<repo_name>/<commit> of the repo
            self.upload_converted_files(upload_key, unzip_dir)

        if multiple_project:
            # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log
            build_log_json = self.update_convert_log(s3_commit_key, part_id + "/")

            # mark current part as finished
            self.cdn_upload_contents({}, s3_commit_key + '/' + part_id + '/finished')

        else:  # single part conversion
            # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log
            build_log_json = self.update_convert_log(s3_commit_key)

            self.cdn_upload_contents({}, s3_commit_key + '/finished')  # flag finished

        results = ClientLinterCallback.deploy_if_conversion_finished(s3_commit_key, self.identifier)
        if results:
            self.all_parts_completed = True
            build_log_json = results

        remove_tree(self.temp_dir)  # cleanup
        return build_log_json
Beispiel #19
0
 def populate_table(self):
     for idx in self.items:
         tx_job = TxJob(**self.items[idx])
         tx_job.insert()
Beispiel #20
0
 def test_get_converter_module(self):
     job = TxJob(**self.job_data)
     cw = ClientWebhook()
     converter = cw.get_converter_module(job)
     self.assertIsNotNone(converter)
     self.assertEqual(converter.name, 'md2html')
Beispiel #21
0
 def test_get_linter_module(self):
     job = TxJob(**self.job_data)
     cw = ClientWebhook()
     linter = cw.get_linter_module(job)
     self.assertIsNotNone(linter)
     self.assertEqual(linter.name, 'obs')
    def process_webhook(self):
        # Check that we got commit data
        if not self.commit_data:
            raise Exception('No commit data from DCS was found in the Payload')

        # Check that the user token is valid
        if not App.gogs_user_token:
            raise Exception('DCS user token not given in Payload.')
        user = App.gogs_handler().get_user(App.gogs_user_token)
        if not user:
            raise Exception('Invalid DCS user token given in Payload')

        # Check that the URL to the DCS repo is valid
        if not self.commit_data['repository']['html_url'].startswith(App.gogs_url):
            raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url))

        # Check that commit is on repo's default branch, else quit
        try:
            commit_branch = self.commit_data['ref'].split('/')[2]
        except IndexError:
            raise Exception('Could not determine commit branch, exiting.')
        except KeyError:
            Exception('This does not appear to be a push, exiting.')
        if commit_branch != self.commit_data['repository']['default_branch']:
            raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch))

        # Get the commit_id, commit_url
        commit_id = self.commit_data['after']
        commit = None
        for commit in self.commit_data['commits']:
            if commit['id'] == commit_id:
                break
        commit_id = commit_id[:10]  # Only use the short form
        commit_url = commit['url']


        # Gather other details from the commit that we will note for the job(s)
        user_name = self.commit_data['repository']['owner']['username']
        repo_name = self.commit_data['repository']['name']
        compare_url = self.commit_data['compare_url']
        commit_message = commit['message']

        if 'pusher' in self.commit_data:
            pusher = self.commit_data['pusher']
        else:
            pusher = {'username': commit['author']['username']}
        pusher_username = pusher['username']

        # Download and unzip the repo files
        repo_dir = self.get_repo_files(commit_url, repo_name)

        # Get the resource container
        rc = RC(repo_dir, repo_name)

        # Save manifest to manifest table
        manifest_data = {
            'repo_name': repo_name,
            'user_name': user_name,
            'lang_code': rc.resource.language.identifier,
            'resource_id': rc.resource.identifier,
            'resource_type': rc.resource.type,
            'title': rc.resource.title,
            'manifest': json.dumps(rc.as_dict()),
            'last_updated': datetime.utcnow()
        }
        # First see if manifest already exists in DB and update it if it is
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        if tx_manifest:
            for key, value in manifest_data.iteritems():
                setattr(tx_manifest, key, value)
            App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data))
            tx_manifest.update()
        else:
            tx_manifest = TxManifest(**manifest_data)
            App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest))
            tx_manifest.insert()

        # Preprocess the files
        preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_')
        results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir)

        # Zip up the massaged files
        zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip')
        App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath))
        add_contents_to_zip(zip_filepath, preprocess_dir)
        App.logger.debug('finished.')

        # Upload zipped file to the S3 bucket
        file_key = self.upload_zip_file(commit_id, zip_filepath)

        job = TxJob()
        job.job_id = self.get_unique_job_id()
        job.identifier = job.job_id
        job.user_name = user_name
        job.repo_name = repo_name
        job.commit_id = commit_id
        job.manifests_id = tx_manifest.id
        job.created_at = datetime.utcnow()
        job.user = user.username  # Username of the token, not necessarily the repo's owner
        job.input_format = rc.resource.file_ext
        job.resource_type = rc.resource.identifier
        job.source = self.source_url_base + "/" + file_key
        job.cdn_bucket = App.cdn_bucket
        job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id)
        job.output = 'https://{0}/{1}'.format(App.cdn_bucket, job.cdn_file)
        job.callback = App.api_url + '/client/callback'
        job.output_format = 'html'
        job.links = {
            "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id),
            "rel": "self",
            "method": "GET"
        }
        job.success = False

        converter = self.get_converter_module(job)
        linter = self.get_linter_module(job)

        if converter:
            job.convert_module = converter.name
            job.started_at = datetime.utcnow()
            job.expires_at = job.started_at + timedelta(days=1)
            job.eta = job.started_at + timedelta(minutes=5)
            job.status = 'started'
            job.message = 'Conversion started...'
            job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id))
        else:
            job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type,
                                                                                             job.input_format,
                                                                                             job.output_format))
            job.message = 'No converter found'
            job.status = 'failed'

        if linter:
            job.lint_module = linter.name
        else:
            App.logger.debug('No linter was found to lint {0}'.format(job.resource_type))

        job.insert()

        # Get S3 bucket/dir ready
        s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)
        self.clear_commit_directory_in_cdn(s3_commit_key)

        # Create a build log
        build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job,
                                               pusher_username, repo_name, user_name)
        # Upload an initial build_log
        self.upload_build_log_to_s3(build_log_json, s3_commit_key)

        # Update the project.json file
        self.update_project_json(commit_id, job, repo_name, user_name)

        # Convert and lint
        if converter:
            if not preprocessor.is_multiple_jobs():
                self.send_request_to_converter(job, converter)
                if linter:
                    extra_payload = {
                        's3_results_key': s3_commit_key
                    }
                    self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
            else:
                # -----------------------------
                # multiple book project
                # -----------------------------
                books = preprocessor.get_book_list()
                App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))
                book_count = len(books)
                build_log_json['multiple'] = True
                build_log_json['build_logs'] = []
                for i in range(0, len(books)):
                    book = books[i]
                    App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count))
                    # Send job request to tx-manager
                    if i == 0:
                        book_job = job  # use the original job created above for the first book
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book)
                    else:
                        book_job = job.clone()  # copy the original job for this book's job
                        book_job.job_id = self.get_unique_job_id()
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book)
                        book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id)
                        book_job.output = 'https://{0}/{1}'.format(App.cdn_bucket, book_job.cdn_file)
                        book_job.links = {
                            "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id),
                            "rel": "self",
                            "method": "GET"
                        }
                        book_job.insert()

                    book_job.source = self.build_multipart_source(file_key, book)
                    book_job.update()
                    book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job,
                                                           pusher_username, repo_name, user_name)
                    if len(book) > 0:
                        part = str(i)
                        book_build_log['book'] = book
                        book_build_log['part'] = part
                    build_log_json['build_logs'].append(book_build_log)
                    self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/")
                    self.send_request_to_converter(book_job, converter)
                    if linter:
                        extra_payload = {
                            'single_file': book,
                            's3_results_key': '{0}/{1}'.format(s3_commit_key, i)
                        }
                        self.send_request_to_linter(book_job, linter, commit_url, extra_payload)

        remove_tree(self.base_temp_dir)  # cleanup
        return build_log_json
Beispiel #23
0
 def test_update_job(self):
     job = TxJob.get(self.items['job3']['job_id'])
     job.status = 'finished'
     job.update()
     job = TxJob.get(self.items['job3']['job_id'])
     self.assertEqual(job.status, 'finished')
Beispiel #24
0
 def test_delete_job(self):
     job = TxJob.get(self.items['job1']['job_id'])
     self.assertIsNotNone(job)
     job.delete()
     job = TxJob.get(self.items['job1']['job_id'])
     self.assertIsNone(job)
Beispiel #25
0
    def process_callback(self):
        job_id_parts = self.identifier.split('/')
        job_id = job_id_parts[0]
        self.job = TxJob.get(job_id)

        if not self.job:
            error = 'No job found for job_id = {0}, identifier = {1}'.format(
                job_id, self.identifier)
            App.logger.error(error)
            raise Exception(error)

        if len(job_id_parts) == 4:
            part_count, part_id, book = job_id_parts[1:]
            App.logger.debug(
                'Multiple project, part {0} of {1}, converting book {2}'.
                format(part_id, part_count, book))
            multiple_project = True
        else:
            App.logger.debug('Single project')
            part_id = None
            multiple_project = False

        self.job.ended_at = datetime.utcnow()
        self.job.success = self.success
        for message in self.log:
            self.job.log_message(message)
        for message in self.warnings:
            self.job.warnings_message(message)
        for message in self.errors:
            self.job.error_message(message)
        if len(self.errors):
            self.job.log_message('{0} function returned with errors.'.format(
                self.job.convert_module))
        elif len(self.warnings):
            self.job.log_message('{0} function returned with warnings.'.format(
                self.job.convert_module))
        else:
            self.job.log_message('{0} function returned successfully.'.format(
                self.job.convert_module))

        if not self.success or len(self.job.errors):
            self.job.success = False
            self.job.status = "failed"
            message = "Conversion failed"
            App.logger.debug(
                "Conversion failed, success: {0}, errors: {1}".format(
                    self.success, self.job.errors))
        elif len(self.job.warnings) > 0:
            self.job.success = True
            self.job.status = "warnings"
            message = "Conversion successful with warnings"
        else:
            self.job.success = True
            self.job.status = "success"
            message = "Conversion successful"

        self.job.message = message
        self.job.log_message(message)
        self.job.log_message('Finished job {0} at {1}'.format(
            self.job.job_id, self.job.ended_at.strftime("%Y-%m-%dT%H:%M:%SZ")))

        s3_commit_key = 'u/{0}/{1}/{2}'.format(self.job.user_name,
                                               self.job.repo_name,
                                               self.job.commit_id)
        upload_key = s3_commit_key
        if multiple_project:
            upload_key += "/" + part_id

        App.logger.debug('Callback for commit {0}...'.format(s3_commit_key))

        # Download the ZIP file of the converted files
        converted_zip_url = self.job.output
        converted_zip_file = os.path.join(self.temp_dir,
                                          converted_zip_url.rpartition('/')[2])
        remove(converted_zip_file)  # make sure old file not present
        download_success = True
        App.logger.debug('Downloading converted zip file from {0}...'.format(
            converted_zip_url))
        try:
            download_file(converted_zip_url, converted_zip_file)
        except:
            download_success = False  # if multiple project we note fail and move on
            if not multiple_project:
                remove_tree(self.temp_dir)  # cleanup
            if self.job.errors is None:
                self.job.errors = []
            self.job.errors.append("Missing converted file: " +
                                   converted_zip_url)
        finally:
            App.logger.debug('download finished, success={0}'.format(
                str(download_success)))

        self.job.update()

        if download_success:
            # Unzip the archive
            unzip_dir = self.unzip_converted_files(converted_zip_file)

            # Upload all files to the cdn_bucket with the key of <user>/<repo_name>/<commit> of the repo
            self.upload_converted_files(upload_key, unzip_dir)

        if multiple_project:
            # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log
            build_log_json = self.update_convert_log(s3_commit_key,
                                                     part_id + "/")

            # mark current part as finished
            self.cdn_upload_contents({}, s3_commit_key + '/' + part_id +
                                     '/finished')

        else:  # single part conversion
            # Now download the existing build_log.json file, update it and upload it back to S3 as convert_log
            build_log_json = self.update_convert_log(s3_commit_key)

            self.cdn_upload_contents({}, s3_commit_key +
                                     '/finished')  # flag finished

        results = ClientLinterCallback.deploy_if_conversion_finished(
            s3_commit_key, self.identifier)
        if results:
            self.all_parts_completed = True
            build_log_json = results

        remove_tree(self.temp_dir)  # cleanup
        return build_log_json
Beispiel #26
0
 def test_load_job(self):
     # Test loading by just giving it the job_id in the constructor
     job = TxJob.get('job1')
     self.assertEqual(job.identifier, self.items['job1']['identifier'])