Beispiel #1
0
    def run(self):
        """
        Call the converters
        """
        success = False
        try:
            if not self.input_zip_file or not os.path.exists(
                    self.input_zip_file):
                # No input zip file yet, so we need to download the archive
                self.download_archive()
            # unzip the input archive
            App.logger.debug("Unzipping {0} to {1}".format(
                self.input_zip_file, self.files_dir))
            unzip(self.input_zip_file, self.files_dir)
            # convert method called
            App.logger.debug("Converting files...")
            if self.convert():
                App.logger.debug("Was able to convert {0}".format(
                    self.resource))
                # zip the output dir to the output archive
                App.logger.debug("Adding files in {0} to {1}".format(
                    self.output_dir, self.output_zip_file))
                add_contents_to_zip(self.output_zip_file, self.output_dir)
                remove_tree(self.output_dir)
                # upload the output archive either to cdn_bucket or to a file (no cdn_bucket)
                App.logger.debug("Uploading archive to {0}/{1}".format(
                    self.cdn_bucket, self.cdn_file))
                self.upload_archive()
                remove(self.output_zip_file)
                App.logger.debug("Uploaded")
                success = True
            else:
                self.log.error('Resource {0} currently not supported.'.format(
                    self.resource))
        except Exception as e:
            self.log.error('Conversion process ended abnormally: {0}'.format(
                e.message))
            App.logger.error('{0}: {1}'.format(str(e), traceback.format_exc()))

        results = {
            'identifier': self.identifier,
            'success': success and len(self.log.logs['error']) == 0,
            'info': self.log.logs['info'],
            'warnings': self.log.logs['warning'],
            'errors': self.log.logs['error']
        }

        if self.callback is not None:
            self.callback_results = results
            self.do_callback(self.callback, self.callback_results)

        App.logger.debug(results)
        return results
    def test_add_contents_to_zip(self):
        self.tmp_dir1 = tempfile.mkdtemp()
        zip_file = os.path.join(self.tmp_dir1, 'foo.zip')

        self.tmp_dir2 = tempfile.mkdtemp()
        tmp_file = os.path.join(self.tmp_dir2, 'foo.txt')
        with open(tmp_file, "w") as tmpf:
            tmpf.write("hello world")

        with zipfile.ZipFile(zip_file, "w"):
            pass  # create empty archive
        file_utils.add_contents_to_zip(zip_file, self.tmp_dir2)

        with zipfile.ZipFile(zip_file, "r") as zf:
            with zf.open(os.path.relpath(tmp_file, self.tmp_dir2), "r") as f:
                self.assertEqual(f.read().decode("ascii"), "hello world")
Beispiel #3
0
    def test_add_contents_to_zip(self):
        self.tmp_dir1 = tempfile.mkdtemp()
        zip_file = os.path.join(self.tmp_dir1, 'foo.zip')

        self.tmp_dir2 = tempfile.mkdtemp()
        tmp_file = os.path.join(self.tmp_dir2, 'foo.txt')
        with open(tmp_file, "w") as tmpf:
            tmpf.write("hello world")

        with zipfile.ZipFile(zip_file, "w"):
            pass  # create empty archive
        file_utils.add_contents_to_zip(zip_file, self.tmp_dir2)

        with zipfile.ZipFile(zip_file, "r") as zf:
            with zf.open(os.path.relpath(tmp_file, self.tmp_dir2), "r") as f:
                self.assertEqual(f.read().decode("ascii"), "hello world")
 def create_new_zip(self, out_dir):
     new_zip = tempfile.mktemp(prefix="linter", suffix='.zip', dir=self.temp_dir)
     add_contents_to_zip(new_zip, out_dir)
     return new_zip
    def process_webhook(self):
        # Check that we got commit data
        if not self.commit_data:
            raise Exception('No commit data from DCS was found in the Payload')

        # Check that the user token is valid
        if not App.gogs_user_token:
            raise Exception('DCS user token not given in Payload.')
        user = App.gogs_handler().get_user(App.gogs_user_token)
        if not user:
            raise Exception('Invalid DCS user token given in Payload')

        # Check that the URL to the DCS repo is valid
        if not self.commit_data['repository']['html_url'].startswith(App.gogs_url):
            raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url))

        # Check that commit is on repo's default branch, else quit
        try:
            commit_branch = self.commit_data['ref'].split('/')[2]
        except IndexError:
            raise Exception('Could not determine commit branch, exiting.')
        except KeyError:
            Exception('This does not appear to be a push, exiting.')
        if commit_branch != self.commit_data['repository']['default_branch']:
            raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch))

        # Get the commit_id, commit_url
        commit_id = self.commit_data['after']
        commit = None
        for commit in self.commit_data['commits']:
            if commit['id'] == commit_id:
                break
        commit_id = commit_id[:10]  # Only use the short form
        commit_url = commit['url']


        # Gather other details from the commit that we will note for the job(s)
        user_name = self.commit_data['repository']['owner']['username']
        repo_name = self.commit_data['repository']['name']
        compare_url = self.commit_data['compare_url']
        commit_message = commit['message']

        if 'pusher' in self.commit_data:
            pusher = self.commit_data['pusher']
        else:
            pusher = {'username': commit['author']['username']}
        pusher_username = pusher['username']

        # Download and unzip the repo files
        repo_dir = self.get_repo_files(commit_url, repo_name)

        # Get the resource container
        rc = RC(repo_dir, repo_name)

        # Save manifest to manifest table
        manifest_data = {
            'repo_name': repo_name,
            'user_name': user_name,
            'lang_code': rc.resource.language.identifier,
            'resource_id': rc.resource.identifier,
            'resource_type': rc.resource.type,
            'title': rc.resource.title,
            'manifest': json.dumps(rc.as_dict()),
            'last_updated': datetime.utcnow()
        }
        # First see if manifest already exists in DB and update it if it is
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        if tx_manifest:
            for key, value in manifest_data.iteritems():
                setattr(tx_manifest, key, value)
            App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data))
            tx_manifest.update()
        else:
            tx_manifest = TxManifest(**manifest_data)
            App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest))
            tx_manifest.insert()

        # Preprocess the files
        preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_')
        results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir)

        # Zip up the massaged files
        zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip')
        App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath))
        add_contents_to_zip(zip_filepath, preprocess_dir)
        App.logger.debug('finished.')

        # Upload zipped file to the S3 bucket
        file_key = self.upload_zip_file(commit_id, zip_filepath)

        job = TxJob()
        job.job_id = self.get_unique_job_id()
        job.identifier = job.job_id
        job.user_name = user_name
        job.repo_name = repo_name
        job.commit_id = commit_id
        job.manifests_id = tx_manifest.id
        job.created_at = datetime.utcnow()
        job.user = user.username  # Username of the token, not necessarily the repo's owner
        job.input_format = rc.resource.file_ext
        job.resource_type = rc.resource.identifier
        job.source = self.source_url_base + "/" + file_key
        job.cdn_bucket = App.cdn_bucket
        job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id)
        job.output = 'https://{0}/{1}'.format(App.cdn_bucket, job.cdn_file)
        job.callback = App.api_url + '/client/callback'
        job.output_format = 'html'
        job.links = {
            "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id),
            "rel": "self",
            "method": "GET"
        }
        job.success = False

        converter = self.get_converter_module(job)
        linter = self.get_linter_module(job)

        if converter:
            job.convert_module = converter.name
            job.started_at = datetime.utcnow()
            job.expires_at = job.started_at + timedelta(days=1)
            job.eta = job.started_at + timedelta(minutes=5)
            job.status = 'started'
            job.message = 'Conversion started...'
            job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id))
        else:
            job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type,
                                                                                             job.input_format,
                                                                                             job.output_format))
            job.message = 'No converter found'
            job.status = 'failed'

        if linter:
            job.lint_module = linter.name
        else:
            App.logger.debug('No linter was found to lint {0}'.format(job.resource_type))

        job.insert()

        # Get S3 bucket/dir ready
        s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)
        self.clear_commit_directory_in_cdn(s3_commit_key)

        # Create a build log
        build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job,
                                               pusher_username, repo_name, user_name)
        # Upload an initial build_log
        self.upload_build_log_to_s3(build_log_json, s3_commit_key)

        # Update the project.json file
        self.update_project_json(commit_id, job, repo_name, user_name)

        # Convert and lint
        if converter:
            if not preprocessor.is_multiple_jobs():
                self.send_request_to_converter(job, converter)
                if linter:
                    extra_payload = {
                        's3_results_key': s3_commit_key
                    }
                    self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
            else:
                # -----------------------------
                # multiple book project
                # -----------------------------
                books = preprocessor.get_book_list()
                App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))
                book_count = len(books)
                build_log_json['multiple'] = True
                build_log_json['build_logs'] = []
                for i in range(0, len(books)):
                    book = books[i]
                    App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count))
                    # Send job request to tx-manager
                    if i == 0:
                        book_job = job  # use the original job created above for the first book
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book)
                    else:
                        book_job = job.clone()  # copy the original job for this book's job
                        book_job.job_id = self.get_unique_job_id()
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book)
                        book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id)
                        book_job.output = 'https://{0}/{1}'.format(App.cdn_bucket, book_job.cdn_file)
                        book_job.links = {
                            "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id),
                            "rel": "self",
                            "method": "GET"
                        }
                        book_job.insert()

                    book_job.source = self.build_multipart_source(file_key, book)
                    book_job.update()
                    book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job,
                                                           pusher_username, repo_name, user_name)
                    if len(book) > 0:
                        part = str(i)
                        book_build_log['book'] = book
                        book_build_log['part'] = part
                    build_log_json['build_logs'].append(book_build_log)
                    self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/")
                    self.send_request_to_converter(book_job, converter)
                    if linter:
                        extra_payload = {
                            'single_file': book,
                            's3_results_key': '{0}/{1}'.format(s3_commit_key, i)
                        }
                        self.send_request_to_linter(book_job, linter, commit_url, extra_payload)

        remove_tree(self.base_temp_dir)  # cleanup
        return build_log_json
Beispiel #6
0
    def process_webhook(self):
        # Check that we got commit data
        if not self.commit_data:
            raise Exception('No commit data from DCS was found in the Payload')

        # Check that the user token is valid
        if not App.gogs_user_token:
            raise Exception('DCS user token not given in Payload.')
        user = App.gogs_handler().get_user(App.gogs_user_token)
        if not user:
            raise Exception('Invalid DCS user token given in Payload')

        # Check that the URL to the DCS repo is valid
        if not self.commit_data['repository']['html_url'].startswith(App.gogs_url):
            raise Exception('Repos can only belong to {0} to use this webhook client.'.format(App.gogs_url))

        # Check that commit is on repo's default branch, else quit
        try:
            commit_branch = self.commit_data['ref'].split('/')[2]
        except IndexError:
            raise Exception('Could not determine commit branch, exiting.')
        except KeyError:
            Exception('This does not appear to be a push, exiting.')
        if commit_branch != self.commit_data['repository']['default_branch']:
            raise Exception('Commit branch: {0} is not the default branch, exiting.'.format(commit_branch))

        # Get the commit_id, commit_url
        commit_id = self.commit_data['after']
        commit = None
        for commit in self.commit_data['commits']:
            if commit['id'] == commit_id:
                break
        commit_id = commit_id[:10]  # Only use the short form
        commit_url = commit['url']


        # Gather other details from the commit that we will note for the job(s)
        user_name = self.commit_data['repository']['owner']['username']
        repo_name = self.commit_data['repository']['name']
        compare_url = self.commit_data['compare_url']
        commit_message = commit['message']

        if 'pusher' in self.commit_data:
            pusher = self.commit_data['pusher']
        else:
            pusher = {'username': commit['author']['username']}
        pusher_username = pusher['username']

        # Download and unzip the repo files
        repo_dir = self.get_repo_files(commit_url, repo_name)

        # Get the resource container
        rc = RC(repo_dir, repo_name)

        # Save manifest to manifest table
        manifest_data = {
            'repo_name': repo_name,
            'user_name': user_name,
            'lang_code': rc.resource.language.identifier,
            'resource_id': rc.resource.identifier,
            'resource_type': rc.resource.type,
            'title': rc.resource.title,
            'manifest': json.dumps(rc.as_dict()),
            'last_updated': datetime.utcnow()
        }
        # First see if manifest already exists in DB and update it if it is
        tx_manifest = TxManifest.get(repo_name=repo_name, user_name=user_name)
        if tx_manifest:
            for key, value in manifest_data.iteritems():
                setattr(tx_manifest, key, value)
            App.logger.debug('Updating manifest in manifest table: {0}'.format(manifest_data))
            tx_manifest.update()
        else:
            tx_manifest = TxManifest(**manifest_data)
            App.logger.debug('Inserting manifest into manifest table: {0}'.format(tx_manifest))
            tx_manifest.insert()

        # Preprocess the files
        preprocess_dir = tempfile.mkdtemp(dir=self.base_temp_dir, prefix='preprocess_')
        results, preprocessor = do_preprocess(rc, repo_dir, preprocess_dir)

        # Zip up the massaged files
        zip_filepath = tempfile.mktemp(dir=self.base_temp_dir, suffix='.zip')
        App.logger.debug('Zipping files from {0} to {1}...'.format(preprocess_dir, zip_filepath))
        add_contents_to_zip(zip_filepath, preprocess_dir)
        App.logger.debug('finished.')

        # Upload zipped file to the S3 bucket
        file_key = self.upload_zip_file(commit_id, zip_filepath)

        job = TxJob()
        job.job_id = self.get_unique_job_id()
        job.identifier = job.job_id
        job.user_name = user_name
        job.repo_name = repo_name
        job.commit_id = commit_id
        job.manifests_id = tx_manifest.id
        job.created_at = datetime.utcnow()
        job.user = user.username  # Username of the token, not necessarily the repo's owner
        job.input_format = rc.resource.file_ext
        job.resource_type = rc.resource.identifier
        job.source = self.source_url_base + "/" + file_key
        job.cdn_bucket = App.cdn_bucket
        job.cdn_file = 'tx/job/{0}.zip'.format(job.job_id)
        job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, job.cdn_file)
        job.callback = App.api_url + '/client/callback'
        job.output_format = 'html'
        job.links = {
            "href": "{0}/tx/job/{1}".format(App.api_url, job.job_id),
            "rel": "self",
            "method": "GET"
        }
        job.success = False

        converter = self.get_converter_module(job)
        linter = self.get_linter_module(job)

        if converter:
            job.convert_module = converter.name
            job.started_at = datetime.utcnow()
            job.expires_at = job.started_at + timedelta(days=1)
            job.eta = job.started_at + timedelta(minutes=5)
            job.status = 'started'
            job.message = 'Conversion started...'
            job.log_message('Started job for {0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id))
        else:
            job.error_message('No converter was found to convert {0} from {1} to {2}'.format(job.resource_type,
                                                                                             job.input_format,
                                                                                             job.output_format))
            job.message = 'No converter found'
            job.status = 'failed'

        if linter:
            job.lint_module = linter.name
        else:
            App.logger.debug('No linter was found to lint {0}'.format(job.resource_type))

        job.insert()

        # Get S3 bucket/dir ready
        s3_commit_key = 'u/{0}/{1}/{2}'.format(job.user_name, job.repo_name, job.commit_id)
        self.clear_commit_directory_in_cdn(s3_commit_key)

        # Create a build log
        build_log_json = self.create_build_log(commit_id, commit_message, commit_url, compare_url, job,
                                               pusher_username, repo_name, user_name)
        # Upload an initial build_log
        self.upload_build_log_to_s3(build_log_json, s3_commit_key)

        # Update the project.json file
        self.update_project_json(commit_id, job, repo_name, user_name)

        # Convert and lint
        if converter:
            if not preprocessor.is_multiple_jobs():
                self.send_request_to_converter(job, converter)
                if linter:
                    extra_payload = {
                        's3_results_key': s3_commit_key
                    }
                    self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
            else:
                # -----------------------------
                # multiple book project
                # -----------------------------
                books = preprocessor.get_book_list()
                App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))
                book_count = len(books)
                build_log_json['multiple'] = True
                build_log_json['build_logs'] = []
                for i in range(0, len(books)):
                    book = books[i]
                    App.logger.debug('Adding job for {0}, part {1} of {2}'.format(book, i, book_count))
                    # Send job request to tx-manager
                    if i == 0:
                        book_job = job  # use the original job created above for the first book
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(job.job_id, book_count, i, book)
                    else:
                        book_job = job.clone()  # copy the original job for this book's job
                        book_job.job_id = self.get_unique_job_id()
                        book_job.identifier = '{0}/{1}/{2}/{3}'.format(book_job.job_id, book_count, i, book)
                        book_job.cdn_file = 'tx/job/{0}.zip'.format(book_job.job_id)
                        book_job.output = 'http://{0}.s3-{1}.amazonaws.com/{2}'.format(App.cdn_bucket, App.aws_region_name, book_job.cdn_file)
                        book_job.links = {
                            "href": "{0}/tx/job/{1}".format(App.api_url, book_job.job_id),
                            "rel": "self",
                            "method": "GET"
                        }
                        book_job.insert()

                    book_job.source = self.build_multipart_source(file_key, book)
                    book_job.update()
                    book_build_log = self.create_build_log(commit_id, commit_message, commit_url, compare_url, book_job,
                                                           pusher_username, repo_name, user_name)
                    if len(book) > 0:
                        part = str(i)
                        book_build_log['book'] = book
                        book_build_log['part'] = part
                    build_log_json['build_logs'].append(book_build_log)
                    self.upload_build_log_to_s3(book_build_log, s3_commit_key, str(i) + "/")
                    self.send_request_to_converter(book_job, converter)
                    if linter:
                        extra_payload = {
                            'single_file': book,
                            's3_results_key': '{0}/{1}'.format(s3_commit_key, i)
                        }
                        self.send_request_to_linter(book_job, linter, commit_url, extra_payload)

        remove_tree(self.base_temp_dir)  # cleanup
        return build_log_json
Beispiel #7
0
 def create_new_zip(self, out_dir):
     new_zip = tempfile.mktemp(prefix="linter",
                               suffix='.zip',
                               dir=self.temp_dir)
     add_contents_to_zip(new_zip, out_dir)
     return new_zip
 def createZipFile(self, zip_filename, destination_folder, source_folder):
     zip_filepath = os.path.join(destination_folder, zip_filename)
     add_contents_to_zip(zip_filepath, source_folder)
     return zip_filepath