예제 #1
0
 def test_delete_manifest(self):
     repo_name = self.items['Door43/en_obs']['repo_name']
     user_name = self.items['Door43/en_obs']['user_name']
     tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
     self.assertIsNotNone(tx_manifest)
     tx_manifest.delete()
     self.assertEqual(TxManifest.query(repo_name=repo_name, user_name=user_name).count(), 0)
    def test_process_webhook_update_manifest_table(self, mock_download_file):
        # given
        manifest_data = {
            'resource_id': ' ',
            'title': ' ',
            'manifest':  ' ',
            'lang_code': ' ',
            'user_name': 'tx-manager-test-data',
            'resource_type': ' ',
            'repo_name': 'en-ulb'}
        tx_manifest = TxManifest(**manifest_data)
        tx_manifest.insert()  # preload table with empty data
        client_web_hook = self.setup_client_webhook_mock('kpb_mat_text_udb_repo', mock_download_file)
        expected_job_count = 1
        expected_error_count = 0

        # when
        results = client_web_hook.process_webhook()

        # then
        self.validateResults(results, expected_job_count, expected_error_count)

        # Check repo was updated in manifest table
        repo_name = client_web_hook.commit_data['repository']['name']
        user_name = client_web_hook.commit_data['repository']['owner']['username']
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        self.assertEqual(tx_manifest.repo_name, client_web_hook.commit_data['repository']['name'])
        self.assertEqual(tx_manifest.resource_id, 'udb')
        self.assertEqual(tx_manifest.lang_code, 'kpb')
        self.assertEqual(tx_manifest.title, 'Unlocked Dynamic Bible')
        self.assertEqual(tx_manifest.resource_type, 'book')
        self.assertGreater(len(tx_manifest.manifest), 100)
예제 #3
0
 def test_delete_manifest(self):
     repo_name = self.items['Door43/en_obs']['repo_name']
     user_name = self.items['Door43/en_obs']['user_name']
     tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
     self.assertIsNotNone(tx_manifest)
     tx_manifest.delete()
     self.assertEqual(
         TxManifest.query(repo_name=repo_name, user_name=user_name).count(),
         0)
예제 #4
0
 def init_table(self, view_count):
     App(db_connection_string='sqlite:///:memory:')
     tx_manifest = TxManifest(repo_name=ViewCountTest.REPO_NAME,
                              user_name=ViewCountTest.USER_NAME,
                              lang_code='lang',
                              resource_id='redID',
                              resource_type='resType',
                              title='title',
                              last_updated=datetime.utcnow(),
                              manifest='{}',
                              views=view_count)
     tx_manifest.insert()
 def init_table(self, view_count):
     App(db_connection_string='sqlite:///:memory:')
     tx_manifest = TxManifest(
         repo_name=ViewCountTest.REPO_NAME,
         user_name=ViewCountTest.USER_NAME,
         lang_code='lang',
         resource_id='redID',
         resource_type='resType',
         title='title',
         last_updated=datetime.utcnow(),
         manifest='{}',
         views=view_count
     )
     tx_manifest.insert()
예제 #6
0
 def test_insert_manifest(self):
     # Insert by giving fields in the constructor
     data = {
         'repo_name': 'Test_Repo1',
         'user_name': 'Test_User1',
         'lang_code': 'es',
         'resource_id': 'ta',
         'resource_type': 'man',
         'title': 'translationAcadamy',
         'last_updated': datetime.utcnow()
     }
     tx_manifest = TxManifest(**data)
     tx_manifest.insert()
     manifest_from_db = TxManifest.get(repo_name=data['repo_name'], user_name=data['user_name'])
     self.assertEqual(manifest_from_db.resource_id, 'ta')
예제 #7
0
 def test_load_manifest(self):
     manifest_dict = self.items['Door43/en_obs']
     # Test loading by just giving it only the repo_name and user_name in the data array in the constructor
     manifest_from_db = TxManifest.get(repo_name=manifest_dict['repo_name'],
                                       user_name=manifest_dict['user_name'])
     self.assertEqual(manifest_from_db.resource_id,
                      manifest_dict['resource_id'])
예제 #8
0
 def test_update_manifest(self):
     repo_name = self.items['francis/fr_ulb']['repo_name']
     user_name = self.items['francis/fr_ulb']['user_name']
     tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
     # Update by setting fields and calling update()
     tx_manifest.resource_id = 'udb'
     tx_manifest.title = 'Unlocked Dynamic Bible'
     tx_manifest.update()
     manifest_from_db = TxManifest.get(repo_name=repo_name, user_name=user_name)
     self.assertEqual(manifest_from_db.title, tx_manifest.title)
     # Update by giving a dict to update()
     tx_manifest.views = 5
     tx_manifest.update()
     manifest_from_db = TxManifest.get(repo_name=repo_name, user_name=user_name)
     self.assertEqual(manifest_from_db.views, 5)
     App.db_close()
예제 #9
0
 def test_insert_manifest(self):
     # Insert by giving fields in the constructor
     data = {
         'repo_name': 'Test_Repo1',
         'user_name': 'Test_User1',
         'lang_code': 'es',
         'resource_id': 'ta',
         'resource_type': 'man',
         'title': 'translationAcadamy',
         'last_updated': datetime.utcnow()
     }
     tx_manifest = TxManifest(**data)
     tx_manifest.insert()
     manifest_from_db = TxManifest.get(repo_name=data['repo_name'],
                                       user_name=data['user_name'])
     self.assertEqual(manifest_from_db.resource_id, 'ta')
예제 #10
0
 def test_query_manifest(self):
     manifests = TxManifest.query()
     self.assertEqual(manifests.count(), len(self.items))
     for tx_manifest in manifests:
         self.assertEqual(
             tx_manifest.resource_id, self.items['{0}/{1}'.format(
                 tx_manifest.user_name,
                 tx_manifest.repo_name)]['resource_id'])
    def validate_conversion(self, user, repo, success, build_log_json, commit_id, commit_sha, commit_path,
                            expected_output_names, job, chapter_count=-1, file_ext=""):
        self.assertTrue(len(build_log_json) > 0)
        self.assertIsNotNone(job)
        self.temp_dir = tempfile.mkdtemp(prefix='testing_')

        if not (type(expected_output_names) is list):
            expected_output_names = [expected_output_names]  # put string in list

        # check pre-convert files
        self.download_and_check_zip_file(self.preconvert_handler, expected_output_names,
                                         self.preprocessor_output_extension, self.get_preconvert_s3_key(commit_sha),
                                         "preconvert", success, chapter_count, file_ext)

        # check converted files
        destination_key = self.get_destination_s3_key(commit_sha, repo, user)
        converted_build_log = self.check_destination_files(self.cdn_handler, expected_output_names, "html",
                                                           destination_key, chapter_count)

        # check required fields
        App.logger.debug(converted_build_log)
        saved_build_json = json.loads(converted_build_log)
        self.assertTrue('commit_id' in saved_build_json)
        self.assertTrue('repo_owner' in saved_build_json)
        self.assertTrue('repo_name' in saved_build_json)
        self.assertTrue('created_at' in saved_build_json)
        self.assertTrue('source' in saved_build_json)
        self.assertTrue('errors' in saved_build_json)
        self.assertTrue('warnings' in saved_build_json)
        self.assertTrue('message' in saved_build_json)
        self.assertTrue('status' in saved_build_json)

        self.assertEqual(len(commit_id), COMMIT_LENGTH)
        self.assertIsNotNone(commit_sha)
        self.assertIsNotNone(commit_path)
        if len(job.errors) > 0:
            self.warn("WARNING: Found job errors: " + str(job.errors))

        if len(build_log_json['errors']) > 0:
            self.warn("WARNING: Found build_log errors: " + str(build_log_json['errors']))

        door43_handler = App.door43_s3_handler()
        deployed_build_log = self.check_deployed_files(door43_handler, expected_output_names, "html",
                                                       destination_key, chapter_count)

        self.compare_build_logs(converted_build_log, deployed_build_log, destination_key)

        if len(self.warnings):
            App.logger.debug("\n#######\nHave warnings:\n#######\n" + '\n'.join(self.warnings))

        self.assertTrue(success)

        # Test that repo is in manifest table
        tx_manifest = TxManifest.get(repo_name=repo, user_name=user)
        # Giving TxManifest above just the composite keys will cause it to load all the data from the App.
        self.assertIsNotNone(tx_manifest)
        self.assertEqual(tx_manifest.repo_name, repo)
        self.assertEqual(tx_manifest.user_name, user)
예제 #12
0
 def test_update_manifest(self):
     repo_name = self.items['francis/fr_ulb']['repo_name']
     user_name = self.items['francis/fr_ulb']['user_name']
     tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
     # Update by setting fields and calling update()
     tx_manifest.resource_id = 'udb'
     tx_manifest.title = 'Unlocked Dynamic Bible'
     tx_manifest.update()
     manifest_from_db = TxManifest.get(repo_name=repo_name,
                                       user_name=user_name)
     self.assertEqual(manifest_from_db.title, tx_manifest.title)
     # Update by giving a dict to update()
     tx_manifest.views = 5
     tx_manifest.update()
     manifest_from_db = TxManifest.get(repo_name=repo_name,
                                       user_name=user_name)
     self.assertEqual(manifest_from_db.views, 5)
     App.db_close()
예제 #13
0
    def test_process_webhook_update_manifest_table(self, mock_download_file):
        # given
        manifest_data = {
            'resource_id': ' ',
            'title': ' ',
            'manifest': ' ',
            'lang_code': ' ',
            'user_name': 'tx-manager-test-data',
            'resource_type': ' ',
            'repo_name': 'en-ulb'
        }
        tx_manifest = TxManifest(**manifest_data)
        tx_manifest.insert()  # preload table with empty data
        client_web_hook = self.setup_client_webhook_mock(
            'kpb_mat_text_udb_repo', mock_download_file)
        expected_job_count = 1
        expected_error_count = 0

        # when
        results = client_web_hook.process_webhook()

        # then
        self.validateResults(results, expected_job_count, expected_error_count)

        # Check repo was updated in manifest table
        repo_name = client_web_hook.commit_data['repository']['name']
        user_name = client_web_hook.commit_data['repository']['owner'][
            'username']
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        self.assertEqual(tx_manifest.repo_name,
                         client_web_hook.commit_data['repository']['name'])
        self.assertEqual(tx_manifest.resource_id, 'udb')
        self.assertEqual(tx_manifest.lang_code, 'kpb')
        self.assertEqual(tx_manifest.title, 'Unlocked Dynamic Bible')
        self.assertEqual(tx_manifest.resource_type, 'book')
        self.assertGreater(len(tx_manifest.manifest), 100)
예제 #14
0
    def get_view_count(self, path, increment=0):
        """
        get normal user page view count with optional increment
        :param path:
        :param increment:
        :return:
        """
        App.logger.debug("Start: get_view_count")

        response = {  # default to error
            'ErrorMessage': PageMetrics.INVALID_URL_ERROR + path
        }

        parsed = urlparse.urlparse(path)
        try:
            empty, u, repo_owner, repo_name = parsed.path.split('/')[0:4]
        except:
            App.logger.warning("Invalid repo url: " + path)
            return response

        if (empty != '') or (u != 'u'):
            App.logger.warning("Invalid repo url: " + path)
            return response

        del response['ErrorMessage']

        App.logger.debug("Valid repo url: " + path)
        # First see record already exists in DB
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=repo_owner)
        if tx_manifest:
            if increment:
                tx_manifest.views += 1
                App.logger.debug('Incrementing view count to {0}'.format(
                    tx_manifest.views))
                tx_manifest.update()
            else:
                App.logger.debug('Returning stored view count of {0}'.format(
                    tx_manifest.views))
            view_count = tx_manifest.views
        else:  # record is not present
            App.logger.debug('No entries for page in manifest table')
            view_count = 0
        response['view_count'] = view_count
        return response
예제 #15
0
    def get_view_count(self, path, increment=0):
        """
        get normal user page view count with optional increment
        :param path:
        :param increment:
        :return:
        """
        App.logger.debug("Start: get_view_count")

        response = {  # default to error
            'ErrorMessage': PageMetrics.INVALID_URL_ERROR + path
        }

        parsed = urlparse.urlparse(path)
        try:
            empty, u, repo_owner, repo_name = parsed.path.split('/')[0:4]
        except:
            App.logger.warning("Invalid repo url: " + path)
            return response

        if (empty != '') or (u != 'u'):
            App.logger.warning("Invalid repo url: " + path)
            return response

        del response['ErrorMessage']

        App.logger.debug("Valid repo url: " + path)
        # First see record already exists in DB
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=repo_owner)
        if tx_manifest:
            if increment:
                tx_manifest.views += 1
                App.logger.debug('Incrementing view count to {0}'.format(tx_manifest.views))
                tx_manifest.update()
            else:
                App.logger.debug('Returning stored view count of {0}'.format(tx_manifest.views))
            view_count = tx_manifest.views
        else:  # record is not present
            App.logger.debug('No entries for page in manifest table')
            view_count = 0
        response['view_count'] = view_count
        return response
    def test_process_webhook(self, mock_download_file):
        # given
        client_web_hook = self.setup_client_webhook_mock('kpb_mat_text_udb_repo', mock_download_file)
        expected_job_count = 1
        expected_error_count = 0

        # when
        results = client_web_hook.process_webhook()

        # then
        self.validateResults(results, expected_job_count, expected_error_count)

        # Check repo was added to manifest table
        repo_name = client_web_hook.commit_data['repository']['name']
        user_name = client_web_hook.commit_data['repository']['owner']['username']
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        tx_job = TxJob.get(results['job_id'])
        self.assertEqual(tx_manifest.repo_name, client_web_hook.commit_data['repository']['name'])
        self.assertEqual(tx_manifest.resource_id, 'udb')
        self.assertEqual(tx_manifest.lang_code, 'kpb')
        self.assertEqual(tx_manifest.id, tx_job.manifests_id)
 def test_apply_filters(self):
     query = TxManifest.query()
     search = ProjectSearch()
     search.url_params = ""
     min_views_query = search.apply_filters(query, 'minViews', 10)
     self.assertEqual(min_views_query.count(), 1)  # count is the number of entries matched
     days_for_recent_query = search.apply_filters(query, 'daysForRecent', 30)
     self.assertEqual(days_for_recent_query.count(), 1)
     repo_query = search.apply_filters(query, 'repo_name', 'obs')
     self.assertEqual(repo_query.count(), 2)
     user_query = search.apply_filters(query, 'user_name', 'john')
     self.assertEqual(user_query.count(), 1)
     title_query = search.apply_filters(query, 'title', 'Open')
     self.assertEqual(title_query.count(), 2)
     full_text_query = search.apply_filters(query, 'full_text', '%great%')
     self.assertEqual(full_text_query.count(), 1)
     last_updated_query = search.apply_filters(query, 'time', str(datetime.utcnow().year))
     self.assertGreaterEqual(last_updated_query.count(), 1)
     resource_id_query = search.apply_filters(query, 'resID', 'obs')
     self.assertEqual(resource_id_query.count(), 2)
     resource_type_query = search.apply_filters(query, 'resType', 'bundle')
     self.assertEqual(resource_type_query.count(), 1)
     languages_query = search.apply_filters(query, 'languages', "[fr,es]")
     self.assertEqual(languages_query.count(), 2)
예제 #18
0
    def test_process_webhook(self, mock_download_file):
        # given
        client_web_hook = self.setup_client_webhook_mock(
            'kpb_mat_text_udb_repo', mock_download_file)
        expected_job_count = 1
        expected_error_count = 0

        # when
        results = client_web_hook.process_webhook()

        # then
        self.validateResults(results, expected_job_count, expected_error_count)

        # Check repo was added to manifest table
        repo_name = client_web_hook.commit_data['repository']['name']
        user_name = client_web_hook.commit_data['repository']['owner'][
            'username']
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        tx_job = TxJob.get(results['job_id'])
        self.assertEqual(tx_manifest.repo_name,
                         client_web_hook.commit_data['repository']['name'])
        self.assertEqual(tx_manifest.resource_id, 'udb')
        self.assertEqual(tx_manifest.lang_code, 'kpb')
        self.assertEqual(tx_manifest.id, tx_job.manifests_id)
예제 #19
0
 def test_query_manifest(self):
     manifests = TxManifest.query()
     self.assertEqual(manifests.count(), len(self.items))
     for tx_manifest in manifests:
         self.assertEqual(tx_manifest.resource_id,
                          self.items['{0}/{1}'.format(tx_manifest.user_name, tx_manifest.repo_name)]['resource_id'])
예제 #20
0
    def process_webhook(self):
        # Check that we got commit data
        if not self.commit_data:
            raise Exception('No commit data from DCS was found in the Payload')

        # Check that the user token is valid
        if not App.gogs_user_token:
            raise Exception('DCS user token not given in Payload.')
        user = App.gogs_handler().get_user(App.gogs_user_token)
        if not user:
            raise Exception('Invalid DCS user token given in Payload')

        # Check that the URL to the DCS repo is valid
        if not self.commit_data['repository']['html_url'].startswith(App.gogs_url):
            raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url))

        # Check that commit is on repo's default branch, else quit
        try:
            commit_branch = self.commit_data['ref'].split('/')[2]
        except IndexError:
            raise Exception('Could not determine commit branch, exiting.')
        except KeyError:
            Exception('This does not appear to be a push, exiting.')
        if commit_branch != self.commit_data['repository']['default_branch']:
            raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch))

        # Get the commit_id, commit_url
        commit_id = self.commit_data['after']
        commit = None
        for commit in self.commit_data['commits']:
            if commit['id'] == commit_id:
                break
        commit_id = commit_id[:10]  # Only use the short form
        commit_url = commit['url']


        # Gather other details from the commit that we will note for the job(s)
        user_name = self.commit_data['repository']['owner']['username']
        repo_name = self.commit_data['repository']['name']
        compare_url = self.commit_data['compare_url']
        commit_message = commit['message']

        if 'pusher' in self.commit_data:
            pusher = self.commit_data['pusher']
        else:
            pusher = {'username': commit['author']['username']}
        pusher_username = pusher['username']

        # Download and unzip the repo files
        repo_dir = self.get_repo_files(commit_url, repo_name)

        # Get the resource container
        rc = RC(repo_dir, repo_name)

        # Save manifest to manifest table
        manifest_data = {
            'repo_name': repo_name,
            'user_name': user_name,
            'lang_code': rc.resource.language.identifier,
            'resource_id': rc.resource.identifier,
            'resource_type': rc.resource.type,
            'title': rc.resource.title,
            'manifest': json.dumps(rc.as_dict()),
            'last_updated': datetime.utcnow()
        }
        # First see if manifest already exists in DB and update it if it is
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        if tx_manifest:
            for key, value in manifest_data.iteritems():
                setattr(tx_manifest, key, value)
            App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data))
            tx_manifest.update()
        else:
            tx_manifest = TxManifest(**manifest_data)
            App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest))
            tx_manifest.insert()

        # Preprocess the files
        preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_')
        results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir)

        # Zip up the massaged files
        zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip')
        App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath))
        add_contents_to_zip(zip_filepath, preprocess_dir)
        App.logger.debug('finished.')

        # Upload zipped file to the S3 bucket
        file_key = self.upload_zip_file(commit_id, zip_filepath)

        job = TxJob()
        job.job_id = self.get_unique_job_id()
        job.identifier = job.job_id
        job.user_name = user_name
        job.repo_name = repo_name
        job.commit_id = commit_id
        job.manifests_id = tx_manifest.id
        job.created_at = datetime.utcnow()
        job.user = user.username  # Username of the token, not necessarily the repo's owner
        job.input_format = rc.resource.file_ext
        job.resource_type = rc.resource.identifier
        job.source = self.source_url_base + "/" + file_key
        job.cdn_bucket = App.cdn_bucket
        job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id)
        job.output = 'https://{0}/{1}'.format(App.cdn_bucket, job.cdn_file)
        job.callback = App.api_url + '/client/callback'
        job.output_format = 'html'
        job.links = {
            "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id),
            "rel": "self",
            "method": "GET"
        }
        job.success = False

        converter = self.get_converter_module(job)
        linter = self.get_linter_module(job)

        if converter:
            job.convert_module = converter.name
            job.started_at = datetime.utcnow()
            job.expires_at = job.started_at + timedelta(days=1)
            job.eta = job.started_at + timedelta(minutes=5)
            job.status = 'started'
            job.message = 'Conversion started...'
            job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id))
        else:
            job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type,
                                                                                             job.input_format,
                                                                                             job.output_format))
            job.message = 'No converter found'
            job.status = 'failed'

        if linter:
            job.lint_module = linter.name
        else:
            App.logger.debug('No linter was found to lint {0}'.format(job.resource_type))

        job.insert()

        # Get S3 bucket/dir ready
        s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)
        self.clear_commit_directory_in_cdn(s3_commit_key)

        # Create a build log
        build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job,
                                               pusher_username, repo_name, user_name)
        # Upload an initial build_log
        self.upload_build_log_to_s3(build_log_json, s3_commit_key)

        # Update the project.json file
        self.update_project_json(commit_id, job, repo_name, user_name)

        # Convert and lint
        if converter:
            if not preprocessor.is_multiple_jobs():
                self.send_request_to_converter(job, converter)
                if linter:
                    extra_payload = {
                        's3_results_key': s3_commit_key
                    }
                    self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
            else:
                # -----------------------------
                # multiple book project
                # -----------------------------
                books = preprocessor.get_book_list()
                App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))
                book_count = len(books)
                build_log_json['multiple'] = True
                build_log_json['build_logs'] = []
                for i in range(0, len(books)):
                    book = books[i]
                    App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count))
                    # Send job request to tx-manager
                    if i == 0:
                        book_job = job  # use the original job created above for the first book
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book)
                    else:
                        book_job = job.clone()  # copy the original job for this book's job
                        book_job.job_id = self.get_unique_job_id()
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book)
                        book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id)
                        book_job.output = 'https://{0}/{1}'.format(App.cdn_bucket, book_job.cdn_file)
                        book_job.links = {
                            "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id),
                            "rel": "self",
                            "method": "GET"
                        }
                        book_job.insert()

                    book_job.source = self.build_multipart_source(file_key, book)
                    book_job.update()
                    book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job,
                                                           pusher_username, repo_name, user_name)
                    if len(book) > 0:
                        part = str(i)
                        book_build_log['book'] = book
                        book_build_log['part'] = part
                    build_log_json['build_logs'].append(book_build_log)
                    self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/")
                    self.send_request_to_converter(book_job, converter)
                    if linter:
                        extra_payload = {
                            'single_file': book,
                            's3_results_key': '{0}/{1}'.format(s3_commit_key, i)
                        }
                        self.send_request_to_linter(book_job, linter, commit_url, extra_payload)

        remove_tree(self.base_temp_dir)  # cleanup
        return build_log_json
 def populate_table(self):
     for idx in self.items:
         tx_manifest = TxManifest(**self.items[idx])
         tx_manifest.insert()
예제 #22
0
    def process_webhook(self):
        # Check that we got commit data
        if not self.commit_data:
            raise Exception('No commit data from DCS was found in the Payload')

        # Check that the user token is valid
        if not App.gogs_user_token:
            raise Exception('DCS user token not given in Payload.')
        user = App.gogs_handler().get_user(App.gogs_user_token)
        if not user:
            raise Exception('Invalid DCS user token given in Payload')

        # Check that the URL to the DCS repo is valid
        if not self.commit_data['repository']['html_url'].startswith(App.gogs_url):
            raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url))

        # Check that commit is on repo's default branch, else quit
        try:
            commit_branch = self.commit_data['ref'].split('/')[2]
        except IndexError:
            raise Exception('Could not determine commit branch, exiting.')
        except KeyError:
            Exception('This does not appear to be a push, exiting.')
        if commit_branch != self.commit_data['repository']['default_branch']:
            raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch))

        # Get the commit_id, commit_url
        commit_id = self.commit_data['after']
        commit = None
        for commit in self.commit_data['commits']:
            if commit['id'] == commit_id:
                break
        commit_id = commit_id[:10]  # Only use the short form
        commit_url = commit['url']


        # Gather other details from the commit that we will note for the job(s)
        user_name = self.commit_data['repository']['owner']['username']
        repo_name = self.commit_data['repository']['name']
        compare_url = self.commit_data['compare_url']
        commit_message = commit['message']

        if 'pusher' in self.commit_data:
            pusher = self.commit_data['pusher']
        else:
            pusher = {'username': commit['author']['username']}
        pusher_username = pusher['username']

        # Download and unzip the repo files
        repo_dir = self.get_repo_files(commit_url, repo_name)

        # Get the resource container
        rc = RC(repo_dir, repo_name)

        # Save manifest to manifest table
        manifest_data = {
            'repo_name': repo_name,
            'user_name': user_name,
            'lang_code': rc.resource.language.identifier,
            'resource_id': rc.resource.identifier,
            'resource_type': rc.resource.type,
            'title': rc.resource.title,
            'manifest': json.dumps(rc.as_dict()),
            'last_updated': datetime.utcnow()
        }
        # First see if manifest already exists in DB and update it if it is
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        if tx_manifest:
            for key, value in manifest_data.iteritems():
                setattr(tx_manifest, key, value)
            App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data))
            tx_manifest.update()
        else:
            tx_manifest = TxManifest(**manifest_data)
            App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest))
            tx_manifest.insert()

        # Preprocess the files
        preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_')
        results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir)

        # Zip up the massaged files
        zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip')
        App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath))
        add_contents_to_zip(zip_filepath, preprocess_dir)
        App.logger.debug('finished.')

        # Upload zipped file to the S3 bucket
        file_key = self.upload_zip_file(commit_id, zip_filepath)

        job = TxJob()
        job.job_id = self.get_unique_job_id()
        job.identifier = job.job_id
        job.user_name = user_name
        job.repo_name = repo_name
        job.commit_id = commit_id
        job.manifests_id = tx_manifest.id
        job.created_at = datetime.utcnow()
        job.user = user.username  # Username of the token, not necessarily the repo's owner
        job.input_format = rc.resource.file_ext
        job.resource_type = rc.resource.identifier
        job.source = self.source_url_base + "/" + file_key
        job.cdn_bucket = App.cdn_bucket
        job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id)
        job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, job.cdn_file)
        job.callback = App.api_url + '/client/callback'
        job.output_format = 'html'
        job.links = {
            "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id),
            "rel": "self",
            "method": "GET"
        }
        job.success = False

        converter = self.get_converter_module(job)
        linter = self.get_linter_module(job)

        if converter:
            job.convert_module = converter.name
            job.started_at = datetime.utcnow()
            job.expires_at = job.started_at + timedelta(days=1)
            job.eta = job.started_at + timedelta(minutes=5)
            job.status = 'started'
            job.message = 'Conversion started...'
            job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id))
        else:
            job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type,
                                                                                             job.input_format,
                                                                                             job.output_format))
            job.message = 'No converter found'
            job.status = 'failed'

        if linter:
            job.lint_module = linter.name
        else:
            App.logger.debug('No linter was found to lint {0}'.format(job.resource_type))

        job.insert()

        # Get S3 bucket/dir ready
        s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)
        self.clear_commit_directory_in_cdn(s3_commit_key)

        # Create a build log
        build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job,
                                               pusher_username, repo_name, user_name)
        # Upload an initial build_log
        self.upload_build_log_to_s3(build_log_json, s3_commit_key)

        # Update the project.json file
        self.update_project_json(commit_id, job, repo_name, user_name)

        # Convert and lint
        if converter:
            if not preprocessor.is_multiple_jobs():
                self.send_request_to_converter(job, converter)
                if linter:
                    extra_payload = {
                        's3_results_key': s3_commit_key
                    }
                    self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
            else:
                # -----------------------------
                # multiple book project
                # -----------------------------
                books = preprocessor.get_book_list()
                App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))
                book_count = len(books)
                build_log_json['multiple'] = True
                build_log_json['build_logs'] = []
                for i in range(0, len(books)):
                    book = books[i]
                    App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count))
                    # Send job request to tx-manager
                    if i == 0:
                        book_job = job  # use the original job created above for the first book
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book)
                    else:
                        book_job = job.clone()  # copy the original job for this book's job
                        book_job.job_id = self.get_unique_job_id()
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book)
                        book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id)
                        book_job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, book_job.cdn_file)
                        book_job.links = {
                            "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id),
                            "rel": "self",
                            "method": "GET"
                        }
                        book_job.insert()

                    book_job.source = self.build_multipart_source(file_key, book)
                    book_job.update()
                    book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job,
                                                           pusher_username, repo_name, user_name)
                    if len(book) > 0:
                        part = str(i)
                        book_build_log['book'] = book
                        book_build_log['part'] = part
                    build_log_json['build_logs'].append(book_build_log)
                    self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/")
                    self.send_request_to_converter(book_job, converter)
                    if linter:
                        extra_payload = {
                            'single_file': book,
                            's3_results_key': '{0}/{1}'.format(s3_commit_key, i)
                        }
                        self.send_request_to_linter(book_job, linter, commit_url, extra_payload)

        remove_tree(self.base_temp_dir)  # cleanup
        return build_log_json
예제 #23
0
 def test_manifest_last_modified_not_auto_updating(self):
     sometime = datetime.strptime('2017-02-11T15:43:11Z', '%Y-%m-%dT%H:%M:%SZ')
     manifest = TxManifest(**{
         'repo_name': 'es_ulb',
         'user_name': 'franco',
         'lang_code': 'es',
         'resource_id': 'ulb',
         'resource_type': 'bundle',
         'title': 'Unlocked Literal Bible',
         'views': 12,
         'last_updated': sometime,
         'manifest': read_file(os.path.join(self.resources_dir, 'obs_manifest.yaml'))
     })
     manifest.insert()
     manifest_from_db = TxManifest.get(manifest.id)
     self.assertEqual(manifest_from_db.last_updated, sometime)
     manifest.views = manifest.views + 1
     manifest.update()
     manifest_from_db = TxManifest.get(manifest.id)
     self.assertEqual(manifest_from_db.last_updated, sometime)
     manifest.last_updated = datetime.strptime('2018-03-12T15:43:11Z', '%Y-%m-%dT%H:%M:%SZ')
     manifest.update()
     manifest_from_db = TxManifest.get(manifest.id)
     self.assertNotEqual(manifest_from_db.last_updated, sometime)
예제 #24
0
    def search_projects(self, criterion):
        """
        search for repos in manifest that match criterion
        :param criterion:
        :return:
        """
        App.logger.debug("Start: search_repos: " + json.dumps(criterion))

        self.criterion = json.loads(json.dumps(criterion))  # clone so we can modify

        try:
            query = TxManifest.query()

            self.url_params = ""
            k = 'languages'
            if k in self.criterion:  # apply languages first
                v = self.criterion[k]
                del self.criterion[k]
                query = self.apply_filters(query, k, v)
                if query is None:
                    return None

            for k in self.criterion:  # apply everything else
                v = self.criterion[k]
                query = self.apply_filters(query, k, v)
                if query is None:
                    return None

            if len(self.url_params) > 0 and (self.url_params[0] == '&'):
                self.url_params = self.url_params[1:]
            self.url_params = '?' + self.url_params

            if 'sort_by' in self.criterion:
                db_key = getattr(TxManifest, self.criterion['sort_by'], None)
                if db_key:
                    query = query.order_by(db_key)

            if 'sort_by_reversed' in self.criterion:
                db_key = getattr(TxManifest, self.criterion['sort_by_reversed'], None)
                if db_key:
                    query = query.order_by(db_key.desc())

        except Exception as e:
            self.log_error('Failed to create a query: ' + str(e))
            return None

        limit = 100 if 'matchLimit' not in self.criterion else self.criterion['matchLimit']
        results = query.limit(limit).all()  # get all matching
        data = []
        if results:
            App.logger.debug('Returning search result count of {0}'.format(len(results)))

            returned_fields = "repo_name, user_name, title, lang_code, manifest, last_updated, views" \
                if "returnedFields" not in self.criterion else self.criterion["returnedFields"]
            returned_fields = returned_fields.replace('resID', 'resource_id')
            returned_fields = returned_fields.replace('resType', 'resource_type')
            returned_fields = returned_fields.split(',')

            # copy wanted fields from this result item
            for result in results:
                item = {}
                for key in returned_fields:
                    key = key.strip()
                    if hasattr(result, key):
                        value = getattr(result, key)
                        destination_key = key
                        if key == 'resource_id':
                            destination_key = 'resID'
                        elif key == 'resource_type':
                            destination_key = 'resType'
                        item[destination_key] = value
                        if isinstance(value, datetime.datetime):
                            item[destination_key] = str(value)
                data.append(item)

        else:  # record is not present
            App.logger.debug('No entries found in search')

        App.db_close()

        self.save_url_search()
        return data
예제 #25
0
 def populate_table(self):
     for idx in self.items:
         tx_manifest = TxManifest(**self.items[idx])
         tx_manifest.insert()
예제 #26
0
 def test_manifest_last_modified_not_auto_updating(self):
     sometime = datetime.strptime('2017-02-11T15:43:11Z',
                                  '%Y-%m-%dT%H:%M:%SZ')
     manifest = TxManifest(
         **{
             'repo_name':
             'es_ulb',
             'user_name':
             'franco',
             'lang_code':
             'es',
             'resource_id':
             'ulb',
             'resource_type':
             'bundle',
             'title':
             'Unlocked Literal Bible',
             'views':
             12,
             'last_updated':
             sometime,
             'manifest':
             read_file(os.path.join(self.resources_dir,
                                    'obs_manifest.yaml'))
         })
     manifest.insert()
     manifest_from_db = TxManifest.get(manifest.id)
     self.assertEqual(manifest_from_db.last_updated, sometime)
     manifest.views = manifest.views + 1
     manifest.update()
     manifest_from_db = TxManifest.get(manifest.id)
     self.assertEqual(manifest_from_db.last_updated, sometime)
     manifest.last_updated = datetime.strptime('2018-03-12T15:43:11Z',
                                               '%Y-%m-%dT%H:%M:%SZ')
     manifest.update()
     manifest_from_db = TxManifest.get(manifest.id)
     self.assertNotEqual(manifest_from_db.last_updated, sometime)
예제 #27
0
 def test_load_manifest(self):
     manifest_dict = self.items['Door43/en_obs']
     # Test loading by just giving it only the repo_name and user_name in the data array in the constructor
     manifest_from_db = TxManifest.get(repo_name=manifest_dict['repo_name'],
                                       user_name=manifest_dict['user_name'])
     self.assertEqual(manifest_from_db.resource_id, manifest_dict['resource_id'])
예제 #28
0
    def validate_conversion(self,
                            user,
                            repo,
                            success,
                            build_log_json,
                            commit_id,
                            commit_sha,
                            commit_path,
                            expected_output_names,
                            job,
                            chapter_count=-1,
                            file_ext=""):
        self.assertTrue(len(build_log_json) > 0)
        self.assertIsNotNone(job)
        self.temp_dir = tempfile.mkdtemp(prefix='testing_')

        if not (type(expected_output_names) is list):
            expected_output_names = [expected_output_names
                                     ]  # put string in list

        # check pre-convert files
        self.download_and_check_zip_file(
            self.preconvert_handler, expected_output_names,
            self.preprocessor_output_extension,
            self.get_preconvert_s3_key(commit_sha), "preconvert", success,
            chapter_count, file_ext)

        # check converted files
        destination_key = self.get_destination_s3_key(commit_sha, repo, user)
        converted_build_log = self.check_destination_files(
            self.cdn_handler, expected_output_names, "html", destination_key,
            chapter_count)

        # check required fields
        App.logger.debug(converted_build_log)
        saved_build_json = json.loads(converted_build_log)
        self.assertTrue('commit_id' in saved_build_json)
        self.assertTrue('repo_owner' in saved_build_json)
        self.assertTrue('repo_name' in saved_build_json)
        self.assertTrue('created_at' in saved_build_json)
        self.assertTrue('source' in saved_build_json)
        self.assertTrue('errors' in saved_build_json)
        self.assertTrue('warnings' in saved_build_json)
        self.assertTrue('message' in saved_build_json)
        self.assertTrue('status' in saved_build_json)

        self.assertEqual(len(commit_id), COMMIT_LENGTH)
        self.assertIsNotNone(commit_sha)
        self.assertIsNotNone(commit_path)
        if len(job.errors) > 0:
            self.warn("WARNING: Found job errors: " + str(job.errors))

        if len(build_log_json['errors']) > 0:
            self.warn("WARNING: Found build_log errors: " +
                      str(build_log_json['errors']))

        door43_handler = App.door43_s3_handler()
        deployed_build_log = self.check_deployed_files(door43_handler,
                                                       expected_output_names,
                                                       "html", destination_key,
                                                       chapter_count)

        self.compare_build_logs(converted_build_log, deployed_build_log,
                                destination_key)

        if len(self.warnings):
            App.logger.debug("\n#######\nHave warnings:\n#######\n" +
                             '\n'.join(self.warnings))

        self.assertTrue(success)

        # Test that repo is in manifest table
        tx_manifest = TxManifest.get(repo_name=repo, user_name=user)
        # Giving TxManifest above just the composite keys will cause it to load all the data from the App.
        self.assertIsNotNone(tx_manifest)
        self.assertEqual(tx_manifest.repo_name, repo)
        self.assertEqual(tx_manifest.user_name, user)