def __parse_pull_request(self, payload): """ Parses a pull request :param payload: :return: True if the pull request should be processed """ pull_request = self.retrieve(payload, 'pull_request', 'payload') self.repo_owner = payload['repository']['owner']['username'] self.repo_name = payload['repository']['name'] self.temp_dir = tempfile.mkdtemp('', self.repo_name, None) self.repo_file = os.path.join(self.temp_dir, self.repo_name + '.zip') # TRICKY: gogs gives a lower case name to the folder in the zip archive self.repo_dir = os.path.join(self.temp_dir, self.repo_name.lower()) commit_sha = self.retrieve(pull_request, 'merge_commit_sha', 'pull_request') self.timestamp = str_to_timestamp( self.retrieve(pull_request, 'merged_at', 'pull_request')) repository = self.retrieve(payload, 'repository', 'payload') url = self.retrieve(repository, 'html_url', 'repository').rstrip('/') self.commit_url = '{}/commit/{}'.format(url, commit_sha) if commit_sha: self.commit_id = commit_sha[:10] else: self.commit_id = None
def __parse_push(self, payload): """ Parses a regular push commit :param payload: :return: """ self.repo_owner = payload['repository']['owner']['username'] self.repo_name = payload['repository']['name'] self.temp_dir = tempfile.mkdtemp('', self.repo_name, None) self.repo_file = os.path.join(self.temp_dir, self.repo_name + '.zip') # TRICKY: gogs gives a lower case name to the folder in the zip archive self.repo_dir = os.path.join(self.temp_dir, self.repo_name.lower()) self.commit_id = payload['after'] commit = None for commit in payload['commits']: if commit['id'] == self.commit_id: break self.commit_url = commit['url'] self.timestamp = str_to_timestamp(commit['timestamp']) self.commit_id = self.commit_id[:10]
def process_format(self, item, dublin_core, project, format): """ Performs the signing on the format object. Files outside of the cdn will not be signed :param item: :param dublin_core: :param project: this may be None. :param format: :return: (already_signed, newly_signed) """ if 'signature' in format and format['signature']: return (True, False) else: self.logger.debug('Signing {}'.format(format['url'])) base_name = os.path.basename(format['url']) file_to_sign = os.path.join(self.temp_dir, base_name) # extract cdn key from url url_info = urlparse.urlparse(format['url']) src_key = url_info.path.lstrip('/') sig_key = '{}.sig'.format(src_key) build_rules = get_build_rules(format, 'signing') # TRICKY: allow dev environments to download from prod environment # RS: I added the s3 bucket here because it isn't yet accessible via urls valid_hosts = [ self.cdn_bucket, self.cdn_bucket + ".s3.us-east-2.amazonaws.com" ] if self.stage_prefix(): if not self.cdn_bucket.startswith(self.stage_prefix()): self.logger.warning( 'Expected `cdn_bucket` to begin with the stage prefix ({}) but found {}' .format(self.stage_prefix(), self.cdn_bucket)) prod_cdn_bucket = self.cdn_bucket.lstrip(self.stage_prefix()) valid_hosts.append(prod_cdn_bucket) # TRICKY: force dev environments to handle prod content as external files # if format['url'].startswith(prod_cdn_url): # build_rules.append('sign_given_url') # TRICKY: some html content is on the api if 'html_format' in build_rules: valid_hosts.append(self.api_bucket) prod_api_bucket = self.api_bucket.lstrip(self.stage_prefix()) valid_hosts.append(prod_api_bucket) # verify url is on the cdn if not url_info.hostname in valid_hosts: # TODO: external media should be imported if it's not too big # This allows media to be hosted on third party servers format['signature'] = '' #'{}.sig'.format(format['url']) self.logger.warning( 'cannot sign files outside of the cdn: {}'.format( format['url'])) self.logger.warning('valid hosts are: {}'.format( ", ".join(valid_hosts))) return (True, True) try: headers = self.url_headers(format['url']) except Exception as e: self.report_error('Could not read headers from {}: {}'.format( format['url'], e)) return (False, False) # skip files that are too large size = int(headers.get('content-length', 0)) if size > SigningHandler.max_file_size: sig_url = '{}.sig'.format(format['url']) if not self._safe_url_exists(sig_url): # wait for signature to be manually uploaded self.report_error('File is too large to sign {}'.format( format['url'])) return (False, False) # finish with manually uploaded signature format['size'] = size if not format['modified']: format['modified'] = str_to_timestamp( datetime.datetime.now().isoformat()) format['signature'] = sig_url return (False, True) # download file try: if 'sign_given_url' in build_rules or 'html_format' in build_rules: # report error if response is 400+ if headers.status >= 400: self.report_error('Resource not available at {}'.format( format['url'])) return (False, False) self.download_file(format['url'], file_to_sign) else: # TRICKY: most files to be signed are stored in a temp directory src_temp_key = 'temp/{}/{}/{}'.format(item['repo_name'], item['commit_id'], src_key) self.cdn_handler.download_file(src_temp_key, file_to_sign) except Exception as e: self.report_error( 'The file "{}" could not be downloaded: {}'.format( base_name, e)) return (False, False) # strip print script from html if 'html_format' in build_rules: self.logger.debug('Removing print script from {} html'.format( item['repo_name'])) self._strip_print_script(file_to_sign) # sign file sig_file = self.signer.sign_file(file_to_sign) try: self.signer.verify_signature(file_to_sign, sig_file) except RuntimeError: if self.logger: self.logger.warning( 'The signature was not successfully verified.') return (False, False) # TRICKY: re-format html urls if 'html_format' in build_rules: html_name = dublin_core['identifier'] if project: html_name = project['identifier'] src_key = '{}/{}/v{}/media/html/{}.html'.format( dublin_core['language']['identifier'], dublin_core['identifier'], self.api_version, html_name) sig_key = '{}.sig'.format(src_key) format['url'] = '{}/{}'.format(self.cdn_url, src_key) # upload files if 'sign_given_url' not in build_rules or 'html_format' in build_rules: # TRICKY: upload temp files to production self.cdn_handler.upload_file(file_to_sign, src_key) self.cdn_handler.upload_file(sig_file, sig_key) # add the url of the sig file to the format format['signature'] = '{}.sig'.format(format['url']) # read modified date from file stats = os.stat(file_to_sign) if not format['modified']: modified = headers.get('last-modified') if modified: # TRICKY: http header gives an odd date format date = datetime.datetime.strptime(modified, '%a, %d %b %Y %H:%M:%S %Z') modified = str_to_timestamp(date.isoformat()) else: modified = unix_to_timestamp(stats.st_mtime) format['modified'] = modified format['size'] = stats.st_size # retrieve playback time from multimedia files _, ext = os.path.splitext(file_to_sign) if ext == '.mp3': audio = MP3(file_to_sign) format['length'] = audio.info.length elif ext == '.mp4': video = MP4(file_to_sign) format['length'] = video.info.length # add file format if missing if not 'format' in format or not format['format']: try: mime = ext_to_mime(ext) format['format'] = mime except Exception as e: if self.logger: self.logger.error(e.message) # clean up disk space os.remove(file_to_sign) return (False, True)
def _build_rc(self): """ Builds a Resource Container following the RC0.2 spec :return: """ manifest_path = os.path.join(self.repo_dir, 'manifest.yaml') if not os.path.isfile(manifest_path): raise Exception( 'Repository {0} does not have a manifest.yaml file'.format( self.repo_name)) try: manifest = WebhookHandler.load_yaml_file(manifest_path) except Exception as e: raise Exception('Bad Manifest: {0}'.format(e)) try: ConsistencyChecker.check_manifest(manifest) except Exception as e: raise Exception('Bad Manifest: {0}'.format(e)) # identifiers must be lowercase manifest['dublin_core']['identifier'] = self.sanitize_identifier( manifest['dublin_core']['identifier']) # resource version must be string manifest['dublin_core']['version'] = '{}'.format( manifest['dublin_core']['version']) # build media formats media_path = os.path.join(self.repo_dir, 'media.yaml') resource_formats = [] project_formats = {} if os.path.isfile(media_path): try: media = WebhookHandler.load_yaml_file(media_path) except Exception as e: raise Exception('Bad Media: {0}'.format(e)) project_chapters = self._listChapters(self.repo_dir, manifest) try: resource_formats, project_formats = parse_media( media=media, content_version=manifest['dublin_core']['version'], project_chapters=project_chapters) except Exception as e: self.report_error('Failed to parse media in {}. {}'.format( self.repo_name, e.message)) stats = os.stat(self.repo_file) # normalize dates try: manifest['dublin_core']['modified'] = str_to_timestamp( manifest['dublin_core']['modified']) except Exception as e: self.logger.warning('Invalid datetime detected: {}'.format( e.message)) try: manifest['dublin_core']['issued'] = str_to_timestamp( manifest['dublin_core']['issued']) except Exception as e: self.logger.warning('Invalid datetime detected: {}'.format( e.message)) # TRICKY: single-project RCs get named after the project to avoid conflicts with multi-project RCs. if len(manifest['projects']) == 1: zip_name = manifest['projects'][0]['identifier'].lower() else: zip_name = manifest['dublin_core']['identifier'] resource_key = '{}/{}/v{}/{}.zip'.format( manifest['dublin_core']['language']['identifier'], manifest['dublin_core']['identifier'].split('-')[-1], manifest['dublin_core']['version'], zip_name) url = '{}/{}'.format(self.cdn_url, resource_key) file_info = { 'size': stats.st_size, 'modified': self.timestamp, 'format': 'application/zip; type={0} content={1} conformsto={2}'.format( manifest['dublin_core']['type'], manifest['dublin_core']['format'], manifest['dublin_core']['conformsto']), 'url': url, 'signature': "" } manifest['formats'] = [file_info] uploads = [{ 'key': self.make_upload_key(resource_key), 'path': self.repo_file }] # split usfm bundles if manifest['dublin_core']['type'] == 'bundle' and manifest[ 'dublin_core']['format'] == 'text/usfm': for project in manifest['projects']: pid = self.sanitize_identifier(project['identifier']) if 'formats' not in project: project['formats'] = [] resource_id = manifest['dublin_core']['identifier'].split( '-')[-1] project_key = '{}/{}/v{}/{}.usfm'.format( manifest['dublin_core']['language']['identifier'], resource_id, manifest['dublin_core']['version'], pid) project_url = '{}/{}'.format(self.cdn_url, project_key) p_file_path = os.path.join(self.repo_dir, project['path'].lstrip('\.\/')) p_stats = os.stat(p_file_path) try: resource_mtime = str_to_timestamp( manifest['dublin_core']['modified']) except Exception as e: self.logger.warning('Invalid datetime detected: {}'.format( e.message)) resource_mtime = manifest['dublin_core']['modified'] project['formats'].append({ 'format': 'text/usfm', 'modified': resource_mtime, 'signature': '', 'size': p_stats.st_size, 'url': project_url }) uploads.append({ 'key': self.make_upload_key(project_key), 'path': p_file_path }) # add media to projects for project in manifest['projects']: pid = self.sanitize_identifier(project['identifier']) if pid in project_formats: if 'formats' not in project: project['formats'] = [] project['formats'] = project['formats'] + project_formats[pid] # add media to resource manifest['formats'] = manifest['formats'] + resource_formats # add html format # TRICKY: these URLS are only available in prod # for project in manifest['projects']: # pid = self.sanitize_identifier(project['identifier']) # html_url = '' # if manifest['dublin_core']['identifier'] == 'obs': # # obs html # html_url = 'https://api.door43.org/tx/print?id={}/{}/{}'.format(self.gogs_org, self.repo_name, self.commit_id) # elif manifest['dublin_core']['identifier'] == 'ta': # # ta html # sort_slug = '{}'.format(int(project['sort']) + 1).zfill(2) # html_url = 'https://cdn.door43.org/u/Door43-Catalog/{}/{}/{}-{}.html'.format(self.repo_name, self.commit_id, sort_slug, pid) # elif manifest['dublin_core']['identifier'] not in ['tq', 'tn', 'tw', 'obs-tn', 'obs-tq']: # # we also have html for Bible resources # name, _ = os.path.splitext(os.path.basename(project['path'])) # html_url = 'https://cdn.door43.org/u/Door43-Catalog/{}/{}/{}.html'.format(self.repo_name, self.commit_id, name) # # if html_url and url_exists(html_url): # self.logger.info('Injecting {} html url: {}'.format(manifest['dublin_core']['identifier'], html_url)) # if 'formats' not in project: project['formats'] = [] # project['formats'].append({ # 'format': 'text/html', # 'modified': '', # 'signature': '', # 'size': '', # 'url': html_url, # 'build_rules': [ # 'signing.html_format' # ] # }) # else: # self.logger.warning('Missing html format for {}_{} at {}'.format(self.repo_name, pid, html_url)) return { 'repo_name': self.repo_name, 'commit_id': self.commit_id, 'language': manifest['dublin_core']['language']['identifier'], 'timestamp': self.timestamp, 'added_at': arrow.utcnow().isoformat(), 'package': json.dumps(manifest, sort_keys=True), 'signed': False, 'dirty': False, 'uploads': uploads }