def create_new_zip(self, out_dir): new_zip = tempfile.NamedTemporaryFile(prefix='linter', suffix='.zip', dir=self.temp_dir, delete=False).name add_contents_to_zip(new_zip, out_dir) return new_zip
def test_add_contents_to_zip(self): self.tmp_dir1 = tempfile.mkdtemp(prefix='Door43_test_file_utils_') zip_file = os.path.join(self.tmp_dir1, 'foo.zip') self.tmp_dir2 = tempfile.mkdtemp(prefix='Door43_test_file_utils_') tmp_file = os.path.join(self.tmp_dir2, 'foo.txt') with open(tmp_file, "w") as tmpf: tmpf.write("hello world") with zipfile.ZipFile(zip_file, "w"): pass # create empty archive file_utils.add_contents_to_zip(zip_file, self.tmp_dir2) with zipfile.ZipFile(zip_file, "r") as zf: with zf.open(os.path.relpath(tmp_file, self.tmp_dir2), "r") as f: self.assertEqual(f.read().decode("ascii"), "hello world")
def test_add_contents_to_zip(self): tmp_dir1 = tempfile.mkdtemp() zip_file = tmp_dir1 + "/foo.zip" tmp_dir2 = tempfile.mkdtemp() tmp_file = tmp_dir2 + "/foo.txt" with open(tmp_file, "w") as tmpf: tmpf.write("hello world") with zipfile.ZipFile(zip_file, "w"): pass # create empty archive file_utils.add_contents_to_zip(zip_file, tmp_dir2) with zipfile.ZipFile(zip_file, "r") as zf: with zf.open(os.path.relpath(tmp_file, tmp_dir2), "r") as f: self.assertEqual(f.read().decode("ascii"), "hello world")
def run(self) -> Dict[str, Any]: """ Call the converters """ success = False if os.path.isdir(self.source_dir): self.files_dir = self.source_dir # TODO: This can be cleaned up later try: # if not self.input_zip_file or not os.path.exists(self.input_zip_file): # # No input zip file yet, so we need to download the archive # self.download_archive() # # unzip the input archive # AppSettings.logger.debug(f"Converter unzipping {self.input_zip_file} to {self.files_dir}") # unzip(self.input_zip_file, self.files_dir) # convert method called AppSettings.logger.debug( f"Converting files from {self.files_dir}…") if self.convert(): #AppSettings.logger.debug(f"Was able to convert {self.resource}") # Zip the output dir to the output archive #AppSettings.logger.debug(f"Converter adding files in {self.output_dir} to {self.output_zip_file}") add_contents_to_zip(self.output_zip_file, self.output_dir) # remove_tree(self.output_dir) # Done in converter.close() # Upload the output archive either to cdn_bucket or to a file (no cdn_bucket) AppSettings.logger.info( f"Converter uploading output archive to {self.cdn_file_key} …" ) if self.cdn_file_key: self.upload_archive() AppSettings.logger.debug( f"Uploaded converted files (using '{self.cdn_file_key}')." ) else: AppSettings.logger.debug( "No converted file upload requested.") remove_file(self.output_zip_file) success = True else: self.log.error( f"Resource type '{self.repo_subject}' currently not supported." ) except Exception as e: self.log.error(f"Conversion process ended abnormally: {e}") AppSettings.logger.debug( f"Converter failure: {traceback.format_exc()}") results = { 'identifier': self.identifier, 'success': success and len(self.log.logs['error']) == 0, 'info': self.log.logs['info'], 'warnings': self.log.logs['warning'], 'errors': self.log.logs['error'] } # if self.callback is not None: # self.callback_results = results # self.do_callback(self.callback, self.callback_results) # AppSettings.logger.debug(results) return results
def handle(event, context): # Get vars and data env_vars = retrieve(event, 'vars', 'payload') api_url = retrieve(env_vars, 'api_url', 'Environment Vars') pre_convert_bucket = retrieve(env_vars, 'pre_convert_bucket', 'Environment Vars') cdn_bucket = retrieve(env_vars, 'cdn_bucket', 'Environment Vars') gogs_url = retrieve(env_vars, 'gogs_url', 'Environment Vars') gogs_user_token = retrieve(env_vars, 'gogs_user_token', 'Environment Vars') repo_commit = retrieve(event, 'data', 'payload') commit_id = repo_commit['after'] commit = None for commit in repo_commit['commits']: if commit['id'] == commit_id: break commit_url = commit['url'] commit_message = commit['message'] if gogs_url not in commit_url: raise Exception( 'Repos can only belong to {0} to use this webhook client.'.format( gogs_url)) repo_name = repo_commit['repository']['name'] repo_owner = repo_commit['repository']['owner']['username'] compare_url = repo_commit['compare_url'] if 'pusher' in repo_commit: pusher = repo_commit['pusher'] else: pusher = {'username': commit['author']['username']} pusher_username = pusher['username'] # 1) Download and unzip the repo files temp_dir = tempfile.mkdtemp(prefix='repo_') download_repo(commit_url, temp_dir) repo_dir = os.path.join(temp_dir, repo_name) if not os.path.isdir(repo_dir): repo_dir = temp_dir # 2) Get the manifest file or make one if it doesn't exist based on meta.json, repo_name and file extensions manifest_path = os.path.join(repo_dir, 'manifest.json') if not os.path.isfile(manifest_path): manifest_path = os.path.join(repo_dir, 'project.json') if not os.path.isfile(manifest_path): manifest_path = None meta_path = os.path.join(repo_dir, 'meta.json') meta = None if os.path.isfile(meta_path): meta = MetaData(meta_path) manifest = Manifest(file_name=manifest_path, repo_name=repo_name, files_path=repo_dir, meta=meta) # determining the repo compiler: generator = '' if manifest.generator and manifest.generator[ 'name'] and manifest.generator['name'].startswith('ts'): generator = 'ts' if not generator: dirs = sorted(get_subdirs(repo_dir, True)) if 'content' in dirs: repo_dir = os.path.join(repo_dir, 'content') elif 'usfm' in dirs: repo_dir = os.path.join(repo_dir, 'usfm') manifest_path = os.path.join(repo_dir, 'manifest.json') write_file(manifest_path, manifest.__dict__ ) # Write it back out so it's using the latest manifest format input_format = manifest.format resource_type = manifest.resource['id'] if resource_type == 'ulb' or resource_type == 'udb': resource_type = 'bible' print(generator) print(input_format) print(manifest.__dict__) try: compiler_class = str_to_class( 'preprocessors.{0}{1}{2}Preprocessor'.format( generator.capitalize(), resource_type.capitalize(), input_format.capitalize())) except AttributeError as e: print('Got AE: {0}'.format(e.message)) compiler_class = preprocessors.Preprocessor print(compiler_class) # merge the source files with the template output_dir = tempfile.mkdtemp(prefix='output_') compiler = compiler_class(manifest, repo_dir, output_dir) compiler.run() # 3) Zip up the massaged files zip_filename = context.aws_request_id + '.zip' # context.aws_request_id is a unique ID for this lambda call, so using it to not conflict with other requests zip_filepath = os.path.join(tempfile.gettempdir(), zip_filename) print('Zipping files from {0} to {1}...'.format(output_dir, zip_filepath)) add_contents_to_zip(zip_filepath, output_dir) if os.path.isfile(manifest_path) and not os.path.isfile( os.path.join(output_dir, 'manifest.json')): add_file_to_zip(zip_filepath, manifest_path, 'manifest.json') print('finished.') # 4) Upload zipped file to the S3 bucket (you may want to do some try/catch and give an error if fails back to Gogs) s3_handler = S3Handler(pre_convert_bucket) file_key = "preconvert/" + zip_filename print('Uploading {0} to {1}/{2}...'.format(zip_filepath, pre_convert_bucket, file_key)) s3_handler.upload_file(zip_filepath, file_key) print('finished.') # Send job request to tx-manager source_url = 'https://s3-us-west-2.amazonaws.com/{0}/{1}'.format( pre_convert_bucket, file_key) # we use us-west-2 for our s3 buckets tx_manager_job_url = api_url + '/tx/job' identifier = "{0}/{1}/{2}".format( repo_owner, repo_name, commit_id[:10] ) # The way to know which repo/commit goes to this job request if input_format == 'markdown': input_format = 'md' payload = { "identifier": identifier, "user_token": gogs_user_token, "resource_type": manifest.resource['id'], "input_format": input_format, "output_format": "html", "source": source_url, "callback": api_url + '/client/callback' } headers = {"content-type": "application/json"} print('Making request to tx-Manager URL {0} with payload:'.format( tx_manager_job_url)) print(payload) print('...') response = requests.post(tx_manager_job_url, json=payload, headers=headers) print('finished.') # for testing print('tx-manager response:') print(response) if not response: raise Exception('Bad request: unable to convert') if 'errorMessage' in response: raise Exception('Bad request: {0}'.format(response['errorMessage'])) json_data = json.loads(response.text) if 'errorMessage' in json_data: raise Exception('Bad request: {0}'.format(json_data['errorMessage'])) if 'job' not in json_data: raise Exception( 'Bad request: tX Manager did not return any info about the job request.' ) build_log_json = json_data['job'] build_log_json['repo_name'] = repo_name build_log_json['repo_owner'] = repo_owner build_log_json['commit_id'] = commit_id build_log_json['committed_by'] = pusher_username build_log_json['commit_url'] = commit_url build_log_json['compare_url'] = compare_url build_log_json['commit_message'] = commit_message if 'errorMessage' in json_data: build_log_json['status'] = 'failed' build_log_json['message'] = json_data['errorMessage'] # Upload files to S3: # S3 location vars cdn_handler = S3Handler(cdn_bucket) s3_commit_key = 'u/{0}'.format(identifier) # Remove everything in the bucket with the s3_commit_key prefix so old files are removed, if any for obj in cdn_handler.get_objects(prefix=s3_commit_key): cdn_handler.delete_file(obj.key) # Make a build_log.json file with this repo and commit data for later processing, upload to S3 build_log_file = os.path.join(tempfile.gettempdir(), 'build_log_request.json') write_file(build_log_file, build_log_json) cdn_handler.upload_file(build_log_file, s3_commit_key + '/build_log.json', 0) # Upload the manifest.json file to the cdn_bucket if it exists if os.path.isfile(manifest_path): cdn_handler.upload_file(manifest_path, s3_commit_key + '/manifest.json', 0) # If there was an error, in order to trigger a 400 error in the API Gateway, we need to raise an # exception with the returned 'errorMessage' because the API Gateway needs to see 'Bad Request:' in the string if 'errorMessage' in json_data: raise Exception('Bad Request: {0}'.format(json_data['errorMessage'])) return build_log_json
def handle(event, context): try: # Get vars and data env_vars = retrieve(event, 'vars', 'payload') api_url = retrieve(env_vars, 'api_url', 'Environment Vars') pre_convert_bucket = retrieve(env_vars, 'pre_convert_bucket', 'Environment Vars') cdn_bucket = retrieve(env_vars, 'cdn_bucket', 'Environment Vars') gogs_url = retrieve(env_vars, 'gogs_url', 'Environment Vars') gogs_user_token = retrieve(env_vars, 'gogs_user_token', 'Environment Vars') repo_commit = retrieve(event, 'data', 'payload') commit_id = repo_commit['after'] commit = None for commit in repo_commit['commits']: if commit['id'] == commit_id: break commit_id = commit_id[:10] # Only use the short form commit_url = commit['url'] commit_message = commit['message'] if gogs_url not in commit_url: raise Exception( 'Repos can only belong to {0} to use this webhook client.'. format(gogs_url)) repo_name = repo_commit['repository']['name'] repo_owner = repo_commit['repository']['owner']['username'] compare_url = repo_commit['compare_url'] if 'pusher' in repo_commit: pusher = repo_commit['pusher'] else: pusher = {'username': commit['author']['username']} pusher_username = pusher['username'] # 1) Download and unzip the repo files temp_dir = tempfile.mkdtemp(prefix='repo_') download_repo(commit_url, temp_dir) repo_dir = os.path.join(temp_dir, repo_name) if not os.path.isdir(repo_dir): repo_dir = temp_dir # 2) Get the manifest file or make one if it doesn't exist based on meta.json, repo_name and file extensions manifest_path = os.path.join(repo_dir, 'manifest.json') if not os.path.isfile(manifest_path): manifest_path = os.path.join(repo_dir, 'project.json') if not os.path.isfile(manifest_path): manifest_path = None meta_path = os.path.join(repo_dir, 'meta.json') meta = None if os.path.isfile(meta_path): meta = MetaData(meta_path) manifest = Manifest(file_name=manifest_path, repo_name=repo_name, files_path=repo_dir, meta=meta) # determining the repo compiler: generator = '' if manifest.generator and manifest.generator[ 'name'] and manifest.generator['name'].startswith('ts'): generator = 'ts' if not generator: dirs = sorted(get_subdirs(repo_dir, True)) if 'content' in dirs: repo_dir = os.path.join(repo_dir, 'content') elif 'usfm' in dirs: repo_dir = os.path.join(repo_dir, 'usfm') manifest_path = os.path.join(repo_dir, 'manifest.json') write_file( manifest_path, manifest.__dict__ ) # Write it back out so it's using the latest manifest format input_format = manifest.format resource_type = manifest.resource['id'] if resource_type == 'ulb' or resource_type == 'udb': resource_type = 'bible' print(generator) print(input_format) print(manifest.__dict__) try: compiler_class = str_to_class( 'preprocessors.{0}{1}{2}Preprocessor'.format( generator.capitalize(), resource_type.capitalize(), input_format.capitalize())) except AttributeError as e: print('Got AE: {0}'.format(e.message)) compiler_class = preprocessors.Preprocessor print(compiler_class) # merge the source files with the template output_dir = tempfile.mkdtemp(prefix='output_') compiler = compiler_class(manifest, repo_dir, output_dir) compiler.run() # 3) Zip up the massaged files zip_filename = context.aws_request_id + '.zip' # context.aws_request_id is a unique ID for this lambda call, so using it to not conflict with other requests zip_filepath = os.path.join(tempfile.gettempdir(), zip_filename) print('Zipping files from {0} to {1}...'.format( output_dir, zip_filepath)) add_contents_to_zip(zip_filepath, output_dir) if os.path.isfile(manifest_path) and not os.path.isfile( os.path.join(output_dir, 'manifest.json')): add_file_to_zip(zip_filepath, manifest_path, 'manifest.json') print('finished.') # 4) Upload zipped file to the S3 bucket (you may want to do some try/catch and give an error if fails back to Gogs) s3_handler = S3Handler(pre_convert_bucket) file_key = "preconvert/" + zip_filename print('Uploading {0} to {1}/{2}...'.format(zip_filepath, pre_convert_bucket, file_key)) s3_handler.upload_file(zip_filepath, file_key) print('finished.') # Send job request to tx-manager source_url = 'https://s3-us-west-2.amazonaws.com/{0}/{1}'.format( pre_convert_bucket, file_key) # we use us-west-2 for our s3 buckets callback_url = api_url + '/client/callback' tx_manager_job_url = api_url + '/tx/job' identifier = "{0}/{1}/{2}".format( repo_owner, repo_name, commit_id ) # The way to know which repo/commit goes to this job request if input_format == 'markdown': input_format = 'md' payload = { "identifier": identifier, "user_token": gogs_user_token, "resource_type": manifest.resource['id'], "input_format": input_format, "output_format": "html", "source": source_url, "callback": callback_url } headers = {"content-type": "application/json"} print('Making request to tx-Manager URL {0} with payload:'.format( tx_manager_job_url)) print(payload) response = requests.post(tx_manager_job_url, json=payload, headers=headers) print('finished.') # for testing print('tx-manager response:') print(response) print(response.status_code) # Fake job in case tx-manager returns an error, can still build the build_log.json job = { 'job_id': None, 'identifier': identifier, 'resource_type': manifest.resource['id'], 'input_format': input_format, 'output_format': 'html', 'source': source_url, 'callback': callback_url, 'message': 'Conversion started...', 'status': 'requested', 'success': None, 'created_at': datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), 'log': [], 'warnings': [], 'errors': [] } if response.status_code != requests.codes.ok: job['status'] = 'failed' job['success'] = False job['message'] = 'Failed to convert!' error = '' if response.text: try: json_data = json.loads(response.text) if 'errorMessage' in json_data: error = json_data['errorMessage'] if error.startswith('Bad Request: '): error = error[len('Bad Request: '):] except Exception: pass job['errors'].append(error) else: json_data = json.loads(response.text) if 'job' not in json_data: job['status'] = 'failed' job['success'] = False job['message'] = 'Failed to convert' job['errors'].append( 'tX Manager did not return any info about the job request.' ) else: job = json_data['job'] cdn_handler = S3Handler(cdn_bucket) # Download the project.json file for this repo (create it if doesn't exist) and update it project_json_key = 'u/{0}/{1}/project.json'.format( repo_owner, repo_name) project_json = cdn_handler.get_json(project_json_key) project_json['user'] = repo_owner project_json['repo'] = repo_name project_json['repo_url'] = 'https://git.door43.org/{0}/{1}'.format( repo_owner, repo_name) commit = { 'id': commit_id, 'created_at': job['created_at'], 'status': job['status'], 'success': job['success'], 'started_at': None, 'ended_at': None } if 'commits' not in project_json: project_json['commits'] = [] commits = [] for c in project_json['commits']: if c['id'] != commit_id: commits.append(c) commits.append(commit) project_json['commits'] = commits project_file = os.path.join(tempfile.gettempdir(), 'project.json') write_file(project_file, project_json) cdn_handler.upload_file(project_file, project_json_key, 0) # Compile data for build_log.json build_log_json = job build_log_json['repo_name'] = repo_name build_log_json['repo_owner'] = repo_owner build_log_json['commit_id'] = commit_id build_log_json['committed_by'] = pusher_username build_log_json['commit_url'] = commit_url build_log_json['compare_url'] = compare_url build_log_json['commit_message'] = commit_message # Upload build_log.json and manifest.json to S3: s3_commit_key = 'u/{0}'.format(identifier) for obj in cdn_handler.get_objects(prefix=s3_commit_key): cdn_handler.delete_file(obj.key) build_log_file = os.path.join(tempfile.gettempdir(), 'build_log.json') write_file(build_log_file, build_log_json) cdn_handler.upload_file(build_log_file, s3_commit_key + '/build_log.json', 0) cdn_handler.upload_file(manifest_path, s3_commit_key + '/manifest.json', 0) if len(job['errors']) > 0: raise Exception('; '.join(job['errors'])) else: return build_log_json except Exception as e: raise Exception('Bad Request: {0}'.format(e))
def handle(event, context): log = [] errors = [] warnings = [] if 'data' not in event: raise Exception('"data" was not in payload') data = event['data'] if 'job' not in data: raise Exception('"job" was not in payload') job = data['job'] if 'source' not in job: raise Exception('"source" was not in "job"') source = job['source'] if 'resource_type' not in job: raise Exception ('"resource_type" was not in "job"') resource = job['resource_type'] if 'cdn_bucket' not in job: raise Exception('"cdn_bucket" was not in "job"') cdn_bucket = job['cdn_bucket'] if 'cdn_file' not in job: raise Exception('"cdn_file" was not in "job') cdn_file = job['cdn_file'] print('source: ' + source) print('cdn_bucket: ' + cdn_bucket) print('cdn_file: ' + cdn_file) options = { 'line_spacing': '120%' } if 'options' in job: options.update(job['options']) output_dir = os.path.join(tempfile.gettempdir(), context.aws_request_id) success = False try: if resource == 'obs': # call with closing to be sure the temp files get cleaned up converter = transform_obs.TransformOBS(source, output_dir, options) try: converter.run() except Exception as e: if e.message: error_message(errors, e.message) else: error_message(errors, 'Conversion process failed to run.') finally: log.extend(converter.log) errors.extend(converter.errors) warnings.extend(converter.warnings) # --- Add other resources here when implemented --- else: raise Exception('Resource "{0}" not currently supported'.format(resource)) zip_file = os.path.join(tempfile.gettempdir(), context.aws_request_id+'.zip') add_contents_to_zip(zip_file, output_dir) log_message(log, "Uploading {0} to {1}/{2}".format(os.path.basename(zip_file), cdn_bucket, cdn_file)) cdn_handler = S3Handler(cdn_bucket) cdn_handler.upload_file(zip_file, cdn_file) log_message(log, "Upload was successful.") success = True except Exception as e: if e.message: error_message(errors, e.message) else: error_message(errors, 'Conversion process ended abnormally') return { 'log': log, 'errors': errors, 'warnings': warnings, 'success': success }