def create_new_zip(self, out_dir):
     new_zip = tempfile.NamedTemporaryFile(prefix='linter',
                                           suffix='.zip',
                                           dir=self.temp_dir,
                                           delete=False).name
     add_contents_to_zip(new_zip, out_dir)
     return new_zip
Beispiel #2
0
    def test_add_contents_to_zip(self):
        self.tmp_dir1 = tempfile.mkdtemp(prefix='Door43_test_file_utils_')
        zip_file = os.path.join(self.tmp_dir1, 'foo.zip')

        self.tmp_dir2 = tempfile.mkdtemp(prefix='Door43_test_file_utils_')
        tmp_file = os.path.join(self.tmp_dir2, 'foo.txt')
        with open(tmp_file, "w") as tmpf:
            tmpf.write("hello world")

        with zipfile.ZipFile(zip_file, "w"):
            pass  # create empty archive
        file_utils.add_contents_to_zip(zip_file, self.tmp_dir2)

        with zipfile.ZipFile(zip_file, "r") as zf:
            with zf.open(os.path.relpath(tmp_file, self.tmp_dir2), "r") as f:
                self.assertEqual(f.read().decode("ascii"), "hello world")
Beispiel #3
0
    def test_add_contents_to_zip(self):
        tmp_dir1 = tempfile.mkdtemp()
        zip_file = tmp_dir1 + "/foo.zip"

        tmp_dir2 = tempfile.mkdtemp()
        tmp_file = tmp_dir2 + "/foo.txt"
        with open(tmp_file, "w") as tmpf:
            tmpf.write("hello world")

        with zipfile.ZipFile(zip_file, "w"):
            pass  # create empty archive
        file_utils.add_contents_to_zip(zip_file, tmp_dir2)

        with zipfile.ZipFile(zip_file, "r") as zf:
            with zf.open(os.path.relpath(tmp_file, tmp_dir2), "r") as f:
                self.assertEqual(f.read().decode("ascii"), "hello world")
Beispiel #4
0
    def run(self) -> Dict[str, Any]:
        """
        Call the converters
        """
        success = False
        if os.path.isdir(self.source_dir):
            self.files_dir = self.source_dir  # TODO: This can be cleaned up later
            try:
                # if not self.input_zip_file or not os.path.exists(self.input_zip_file):
                #     # No input zip file yet, so we need to download the archive
                #     self.download_archive()
                # # unzip the input archive
                # AppSettings.logger.debug(f"Converter unzipping {self.input_zip_file} to {self.files_dir}")
                # unzip(self.input_zip_file, self.files_dir)

                # convert method called
                AppSettings.logger.debug(
                    f"Converting files from {self.files_dir}…")
                if self.convert():
                    #AppSettings.logger.debug(f"Was able to convert {self.resource}")
                    # Zip the output dir to the output archive
                    #AppSettings.logger.debug(f"Converter adding files in {self.output_dir} to {self.output_zip_file}")
                    add_contents_to_zip(self.output_zip_file, self.output_dir)
                    # remove_tree(self.output_dir) # Done in converter.close()
                    # Upload the output archive either to cdn_bucket or to a file (no cdn_bucket)
                    AppSettings.logger.info(
                        f"Converter uploading output archive to {self.cdn_file_key} …"
                    )
                    if self.cdn_file_key:
                        self.upload_archive()
                        AppSettings.logger.debug(
                            f"Uploaded converted files (using '{self.cdn_file_key}')."
                        )
                    else:
                        AppSettings.logger.debug(
                            "No converted file upload requested.")
                    remove_file(self.output_zip_file)
                    success = True
                else:
                    self.log.error(
                        f"Resource type '{self.repo_subject}' currently not supported."
                    )
            except Exception as e:
                self.log.error(f"Conversion process ended abnormally: {e}")
                AppSettings.logger.debug(
                    f"Converter failure: {traceback.format_exc()}")

        results = {
            'identifier': self.identifier,
            'success': success and len(self.log.logs['error']) == 0,
            'info': self.log.logs['info'],
            'warnings': self.log.logs['warning'],
            'errors': self.log.logs['error']
        }

        # if self.callback is not None:
        #     self.callback_results = results
        #     self.do_callback(self.callback, self.callback_results)

        # AppSettings.logger.debug(results)
        return results
Beispiel #5
0
def handle(event, context):
    # Get vars and data
    env_vars = retrieve(event, 'vars', 'payload')
    api_url = retrieve(env_vars, 'api_url', 'Environment Vars')
    pre_convert_bucket = retrieve(env_vars, 'pre_convert_bucket',
                                  'Environment Vars')
    cdn_bucket = retrieve(env_vars, 'cdn_bucket', 'Environment Vars')
    gogs_url = retrieve(env_vars, 'gogs_url', 'Environment Vars')
    gogs_user_token = retrieve(env_vars, 'gogs_user_token', 'Environment Vars')
    repo_commit = retrieve(event, 'data', 'payload')

    commit_id = repo_commit['after']
    commit = None
    for commit in repo_commit['commits']:
        if commit['id'] == commit_id:
            break

    commit_url = commit['url']
    commit_message = commit['message']

    if gogs_url not in commit_url:
        raise Exception(
            'Repos can only belong to {0} to use this webhook client.'.format(
                gogs_url))

    repo_name = repo_commit['repository']['name']
    repo_owner = repo_commit['repository']['owner']['username']
    compare_url = repo_commit['compare_url']

    if 'pusher' in repo_commit:
        pusher = repo_commit['pusher']
    else:
        pusher = {'username': commit['author']['username']}
    pusher_username = pusher['username']

    # 1) Download and unzip the repo files
    temp_dir = tempfile.mkdtemp(prefix='repo_')
    download_repo(commit_url, temp_dir)
    repo_dir = os.path.join(temp_dir, repo_name)
    if not os.path.isdir(repo_dir):
        repo_dir = temp_dir

    # 2) Get the manifest file or make one if it doesn't exist based on meta.json, repo_name and file extensions
    manifest_path = os.path.join(repo_dir, 'manifest.json')
    if not os.path.isfile(manifest_path):
        manifest_path = os.path.join(repo_dir, 'project.json')
        if not os.path.isfile(manifest_path):
            manifest_path = None
    meta_path = os.path.join(repo_dir, 'meta.json')
    meta = None
    if os.path.isfile(meta_path):
        meta = MetaData(meta_path)
    manifest = Manifest(file_name=manifest_path,
                        repo_name=repo_name,
                        files_path=repo_dir,
                        meta=meta)

    # determining the repo compiler:
    generator = ''
    if manifest.generator and manifest.generator[
            'name'] and manifest.generator['name'].startswith('ts'):
        generator = 'ts'
    if not generator:
        dirs = sorted(get_subdirs(repo_dir, True))
        if 'content' in dirs:
            repo_dir = os.path.join(repo_dir, 'content')
        elif 'usfm' in dirs:
            repo_dir = os.path.join(repo_dir, 'usfm')

    manifest_path = os.path.join(repo_dir, 'manifest.json')
    write_file(manifest_path, manifest.__dict__
               )  # Write it back out so it's using the latest manifest format

    input_format = manifest.format
    resource_type = manifest.resource['id']
    if resource_type == 'ulb' or resource_type == 'udb':
        resource_type = 'bible'

    print(generator)
    print(input_format)
    print(manifest.__dict__)
    try:
        compiler_class = str_to_class(
            'preprocessors.{0}{1}{2}Preprocessor'.format(
                generator.capitalize(), resource_type.capitalize(),
                input_format.capitalize()))
    except AttributeError as e:
        print('Got AE: {0}'.format(e.message))
        compiler_class = preprocessors.Preprocessor

    print(compiler_class)

    # merge the source files with the template
    output_dir = tempfile.mkdtemp(prefix='output_')
    compiler = compiler_class(manifest, repo_dir, output_dir)
    compiler.run()

    # 3) Zip up the massaged files
    zip_filename = context.aws_request_id + '.zip'  # context.aws_request_id is a unique ID for this lambda call, so using it to not conflict with other requests
    zip_filepath = os.path.join(tempfile.gettempdir(), zip_filename)
    print('Zipping files from {0} to {1}...'.format(output_dir, zip_filepath))
    add_contents_to_zip(zip_filepath, output_dir)
    if os.path.isfile(manifest_path) and not os.path.isfile(
            os.path.join(output_dir, 'manifest.json')):
        add_file_to_zip(zip_filepath, manifest_path, 'manifest.json')
    print('finished.')

    # 4) Upload zipped file to the S3 bucket (you may want to do some try/catch and give an error if fails back to Gogs)
    s3_handler = S3Handler(pre_convert_bucket)
    file_key = "preconvert/" + zip_filename
    print('Uploading {0} to {1}/{2}...'.format(zip_filepath,
                                               pre_convert_bucket, file_key))
    s3_handler.upload_file(zip_filepath, file_key)
    print('finished.')

    # Send job request to tx-manager
    source_url = 'https://s3-us-west-2.amazonaws.com/{0}/{1}'.format(
        pre_convert_bucket, file_key)  # we use us-west-2 for our s3 buckets
    tx_manager_job_url = api_url + '/tx/job'
    identifier = "{0}/{1}/{2}".format(
        repo_owner, repo_name, commit_id[:10]
    )  # The way to know which repo/commit goes to this job request
    if input_format == 'markdown':
        input_format = 'md'
    payload = {
        "identifier": identifier,
        "user_token": gogs_user_token,
        "resource_type": manifest.resource['id'],
        "input_format": input_format,
        "output_format": "html",
        "source": source_url,
        "callback": api_url + '/client/callback'
    }
    headers = {"content-type": "application/json"}

    print('Making request to tx-Manager URL {0} with payload:'.format(
        tx_manager_job_url))
    print(payload)
    print('...')
    response = requests.post(tx_manager_job_url, json=payload, headers=headers)
    print('finished.')

    # for testing
    print('tx-manager response:')
    print(response)

    if not response:
        raise Exception('Bad request: unable to convert')

    if 'errorMessage' in response:
        raise Exception('Bad request: {0}'.format(response['errorMessage']))

    json_data = json.loads(response.text)

    if 'errorMessage' in json_data:
        raise Exception('Bad request: {0}'.format(json_data['errorMessage']))

    if 'job' not in json_data:
        raise Exception(
            'Bad request: tX Manager did not return any info about the job request.'
        )
    build_log_json = json_data['job']

    build_log_json['repo_name'] = repo_name
    build_log_json['repo_owner'] = repo_owner
    build_log_json['commit_id'] = commit_id
    build_log_json['committed_by'] = pusher_username
    build_log_json['commit_url'] = commit_url
    build_log_json['compare_url'] = compare_url
    build_log_json['commit_message'] = commit_message

    if 'errorMessage' in json_data:
        build_log_json['status'] = 'failed'
        build_log_json['message'] = json_data['errorMessage']

    # Upload files to S3:

    # S3 location vars
    cdn_handler = S3Handler(cdn_bucket)
    s3_commit_key = 'u/{0}'.format(identifier)

    # Remove everything in the bucket with the s3_commit_key prefix so old files are removed, if any
    for obj in cdn_handler.get_objects(prefix=s3_commit_key):
        cdn_handler.delete_file(obj.key)

    # Make a build_log.json file with this repo and commit data for later processing, upload to S3
    build_log_file = os.path.join(tempfile.gettempdir(),
                                  'build_log_request.json')
    write_file(build_log_file, build_log_json)
    cdn_handler.upload_file(build_log_file, s3_commit_key + '/build_log.json',
                            0)

    # Upload the manifest.json file to the cdn_bucket if it exists
    if os.path.isfile(manifest_path):
        cdn_handler.upload_file(manifest_path,
                                s3_commit_key + '/manifest.json', 0)

    # If there was an error, in order to trigger a 400 error in the API Gateway, we need to raise an
    # exception with the returned 'errorMessage' because the API Gateway needs to see 'Bad Request:' in the string
    if 'errorMessage' in json_data:
        raise Exception('Bad Request: {0}'.format(json_data['errorMessage']))

    return build_log_json
Beispiel #6
0
def handle(event, context):
    try:
        # Get vars and data
        env_vars = retrieve(event, 'vars', 'payload')
        api_url = retrieve(env_vars, 'api_url', 'Environment Vars')
        pre_convert_bucket = retrieve(env_vars, 'pre_convert_bucket',
                                      'Environment Vars')
        cdn_bucket = retrieve(env_vars, 'cdn_bucket', 'Environment Vars')
        gogs_url = retrieve(env_vars, 'gogs_url', 'Environment Vars')
        gogs_user_token = retrieve(env_vars, 'gogs_user_token',
                                   'Environment Vars')
        repo_commit = retrieve(event, 'data', 'payload')

        commit_id = repo_commit['after']
        commit = None
        for commit in repo_commit['commits']:
            if commit['id'] == commit_id:
                break
        commit_id = commit_id[:10]  # Only use the short form

        commit_url = commit['url']
        commit_message = commit['message']

        if gogs_url not in commit_url:
            raise Exception(
                'Repos can only belong to {0} to use this webhook client.'.
                format(gogs_url))

        repo_name = repo_commit['repository']['name']
        repo_owner = repo_commit['repository']['owner']['username']
        compare_url = repo_commit['compare_url']

        if 'pusher' in repo_commit:
            pusher = repo_commit['pusher']
        else:
            pusher = {'username': commit['author']['username']}
        pusher_username = pusher['username']

        # 1) Download and unzip the repo files
        temp_dir = tempfile.mkdtemp(prefix='repo_')
        download_repo(commit_url, temp_dir)
        repo_dir = os.path.join(temp_dir, repo_name)
        if not os.path.isdir(repo_dir):
            repo_dir = temp_dir

        # 2) Get the manifest file or make one if it doesn't exist based on meta.json, repo_name and file extensions
        manifest_path = os.path.join(repo_dir, 'manifest.json')
        if not os.path.isfile(manifest_path):
            manifest_path = os.path.join(repo_dir, 'project.json')
            if not os.path.isfile(manifest_path):
                manifest_path = None
        meta_path = os.path.join(repo_dir, 'meta.json')
        meta = None
        if os.path.isfile(meta_path):
            meta = MetaData(meta_path)
        manifest = Manifest(file_name=manifest_path,
                            repo_name=repo_name,
                            files_path=repo_dir,
                            meta=meta)

        # determining the repo compiler:
        generator = ''
        if manifest.generator and manifest.generator[
                'name'] and manifest.generator['name'].startswith('ts'):
            generator = 'ts'
        if not generator:
            dirs = sorted(get_subdirs(repo_dir, True))
            if 'content' in dirs:
                repo_dir = os.path.join(repo_dir, 'content')
            elif 'usfm' in dirs:
                repo_dir = os.path.join(repo_dir, 'usfm')

        manifest_path = os.path.join(repo_dir, 'manifest.json')
        write_file(
            manifest_path, manifest.__dict__
        )  # Write it back out so it's using the latest manifest format

        input_format = manifest.format
        resource_type = manifest.resource['id']
        if resource_type == 'ulb' or resource_type == 'udb':
            resource_type = 'bible'

        print(generator)
        print(input_format)
        print(manifest.__dict__)
        try:
            compiler_class = str_to_class(
                'preprocessors.{0}{1}{2}Preprocessor'.format(
                    generator.capitalize(), resource_type.capitalize(),
                    input_format.capitalize()))
        except AttributeError as e:
            print('Got AE: {0}'.format(e.message))
            compiler_class = preprocessors.Preprocessor

        print(compiler_class)

        # merge the source files with the template
        output_dir = tempfile.mkdtemp(prefix='output_')
        compiler = compiler_class(manifest, repo_dir, output_dir)
        compiler.run()

        # 3) Zip up the massaged files
        zip_filename = context.aws_request_id + '.zip'  # context.aws_request_id is a unique ID for this lambda call, so using it to not conflict with other requests
        zip_filepath = os.path.join(tempfile.gettempdir(), zip_filename)
        print('Zipping files from {0} to {1}...'.format(
            output_dir, zip_filepath))
        add_contents_to_zip(zip_filepath, output_dir)
        if os.path.isfile(manifest_path) and not os.path.isfile(
                os.path.join(output_dir, 'manifest.json')):
            add_file_to_zip(zip_filepath, manifest_path, 'manifest.json')
        print('finished.')

        # 4) Upload zipped file to the S3 bucket (you may want to do some try/catch and give an error if fails back to Gogs)
        s3_handler = S3Handler(pre_convert_bucket)
        file_key = "preconvert/" + zip_filename
        print('Uploading {0} to {1}/{2}...'.format(zip_filepath,
                                                   pre_convert_bucket,
                                                   file_key))
        s3_handler.upload_file(zip_filepath, file_key)
        print('finished.')

        # Send job request to tx-manager
        source_url = 'https://s3-us-west-2.amazonaws.com/{0}/{1}'.format(
            pre_convert_bucket,
            file_key)  # we use us-west-2 for our s3 buckets
        callback_url = api_url + '/client/callback'
        tx_manager_job_url = api_url + '/tx/job'
        identifier = "{0}/{1}/{2}".format(
            repo_owner, repo_name, commit_id
        )  # The way to know which repo/commit goes to this job request
        if input_format == 'markdown':
            input_format = 'md'
        payload = {
            "identifier": identifier,
            "user_token": gogs_user_token,
            "resource_type": manifest.resource['id'],
            "input_format": input_format,
            "output_format": "html",
            "source": source_url,
            "callback": callback_url
        }
        headers = {"content-type": "application/json"}

        print('Making request to tx-Manager URL {0} with payload:'.format(
            tx_manager_job_url))
        print(payload)
        response = requests.post(tx_manager_job_url,
                                 json=payload,
                                 headers=headers)
        print('finished.')

        # for testing
        print('tx-manager response:')
        print(response)
        print(response.status_code)

        # Fake job in case tx-manager returns an error, can still build the build_log.json
        job = {
            'job_id': None,
            'identifier': identifier,
            'resource_type': manifest.resource['id'],
            'input_format': input_format,
            'output_format': 'html',
            'source': source_url,
            'callback': callback_url,
            'message': 'Conversion started...',
            'status': 'requested',
            'success': None,
            'created_at': datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
            'log': [],
            'warnings': [],
            'errors': []
        }

        if response.status_code != requests.codes.ok:
            job['status'] = 'failed'
            job['success'] = False
            job['message'] = 'Failed to convert!'
            error = ''
            if response.text:
                try:
                    json_data = json.loads(response.text)
                    if 'errorMessage' in json_data:
                        error = json_data['errorMessage']
                        if error.startswith('Bad Request: '):
                            error = error[len('Bad Request: '):]
                except Exception:
                    pass
            job['errors'].append(error)
        else:
            json_data = json.loads(response.text)

            if 'job' not in json_data:
                job['status'] = 'failed'
                job['success'] = False
                job['message'] = 'Failed to convert'
                job['errors'].append(
                    'tX Manager did not return any info about the job request.'
                )
            else:
                job = json_data['job']

        cdn_handler = S3Handler(cdn_bucket)

        # Download the project.json file for this repo (create it if doesn't exist) and update it
        project_json_key = 'u/{0}/{1}/project.json'.format(
            repo_owner, repo_name)
        project_json = cdn_handler.get_json(project_json_key)
        project_json['user'] = repo_owner
        project_json['repo'] = repo_name
        project_json['repo_url'] = 'https://git.door43.org/{0}/{1}'.format(
            repo_owner, repo_name)
        commit = {
            'id': commit_id,
            'created_at': job['created_at'],
            'status': job['status'],
            'success': job['success'],
            'started_at': None,
            'ended_at': None
        }
        if 'commits' not in project_json:
            project_json['commits'] = []
        commits = []
        for c in project_json['commits']:
            if c['id'] != commit_id:
                commits.append(c)
        commits.append(commit)
        project_json['commits'] = commits
        project_file = os.path.join(tempfile.gettempdir(), 'project.json')
        write_file(project_file, project_json)
        cdn_handler.upload_file(project_file, project_json_key, 0)

        # Compile data for build_log.json
        build_log_json = job
        build_log_json['repo_name'] = repo_name
        build_log_json['repo_owner'] = repo_owner
        build_log_json['commit_id'] = commit_id
        build_log_json['committed_by'] = pusher_username
        build_log_json['commit_url'] = commit_url
        build_log_json['compare_url'] = compare_url
        build_log_json['commit_message'] = commit_message
        # Upload build_log.json and manifest.json to S3:
        s3_commit_key = 'u/{0}'.format(identifier)
        for obj in cdn_handler.get_objects(prefix=s3_commit_key):
            cdn_handler.delete_file(obj.key)
        build_log_file = os.path.join(tempfile.gettempdir(), 'build_log.json')
        write_file(build_log_file, build_log_json)
        cdn_handler.upload_file(build_log_file,
                                s3_commit_key + '/build_log.json', 0)

        cdn_handler.upload_file(manifest_path,
                                s3_commit_key + '/manifest.json', 0)

        if len(job['errors']) > 0:
            raise Exception('; '.join(job['errors']))
        else:
            return build_log_json
    except Exception as e:
        raise Exception('Bad Request: {0}'.format(e))
Beispiel #7
0
def handle(event, context):
    log = []
    errors = []
    warnings = []

    if 'data' not in event:
        raise Exception('"data" was not in payload')
    data = event['data']

    if 'job' not in data:
        raise Exception('"job" was not in payload')
    job = data['job']
    
    if 'source' not in job:
        raise Exception('"source" was not in "job"')
    source = job['source']
    
    if 'resource_type' not in job:
        raise Exception ('"resource_type" was not in "job"')
    resource = job['resource_type']
    
    if 'cdn_bucket' not in job:
        raise Exception('"cdn_bucket" was not in "job"')
    cdn_bucket = job['cdn_bucket']
    
    if 'cdn_file' not in job:
        raise Exception('"cdn_file" was not in "job')
    cdn_file = job['cdn_file']
    
    print('source: ' + source)
    print('cdn_bucket: ' + cdn_bucket)
    print('cdn_file: ' + cdn_file)
    
    options = {
        'line_spacing': '120%'
    }
    
    if 'options' in job:
        options.update(job['options'])
    
    output_dir = os.path.join(tempfile.gettempdir(), context.aws_request_id)

    success = False
    try:
        if resource == 'obs':
            # call with closing to be sure the temp files get cleaned up
            converter = transform_obs.TransformOBS(source, output_dir, options)
            try:
                converter.run()
            except Exception as e:
                if e.message:
                    error_message(errors, e.message)
                else:
                    error_message(errors, 'Conversion process failed to run.')
            finally:
                log.extend(converter.log)
                errors.extend(converter.errors)
                warnings.extend(converter.warnings)
        # --- Add other resources here when implemented ---
        else:
            raise Exception('Resource "{0}" not currently supported'.format(resource))

        zip_file = os.path.join(tempfile.gettempdir(), context.aws_request_id+'.zip')
        add_contents_to_zip(zip_file, output_dir)
        log_message(log, "Uploading {0} to {1}/{2}".format(os.path.basename(zip_file), cdn_bucket, cdn_file))
        cdn_handler = S3Handler(cdn_bucket)
        cdn_handler.upload_file(zip_file, cdn_file)
        log_message(log, "Upload was successful.")
        success = True
    except Exception as e:
        if e.message:
            error_message(errors, e.message)
        else:
            error_message(errors, 'Conversion process ended abnormally')

    return {
        'log': log,
        'errors': errors,
        'warnings': warnings,
        'success': success
    }