def _fetch_entries(): tree = self.repo.get_root_tree(commit) for entry_wrapper in self.repo.iter_tree(tree): entry = entry_wrapper.tree_entry path = force_text(entry_wrapper.path) blob = entry_wrapper.blob sha_hash = (get_sha256(io.BytesIO(memoryview(blob))) if not entry.type == 'tree' else '') commit_tzinfo = FixedOffset(commit.commit_time_offset) commit_time = datetime.fromtimestamp(float(commit.commit_time), commit_tzinfo) mimetype, entry_mime_category = self.get_entry_mime_type( entry, blob) result[path] = { 'depth': path.count(os.sep), 'filename': force_text(entry.name), 'sha256': sha_hash, 'mime_category': entry_mime_category, 'mimetype': mimetype, 'path': path, 'size': blob.size if blob is not None else None, 'modified': commit_time, } return result
def _fetch_entries(): tree = self.repo.get_root_tree(commit) for entry_wrapper in self.repo.iter_tree(tree): entry = entry_wrapper.tree_entry path = force_text(entry_wrapper.path) blob = entry_wrapper.blob sha_hash = ( get_sha256(io.BytesIO(memoryview(blob))) if not entry.type == 'tree' else '') commit_tzinfo = FixedOffset(commit.commit_time_offset) commit_time = datetime.fromtimestamp( float(commit.commit_time), commit_tzinfo) mimetype, entry_mime_category = self.get_entry_mime_type( entry, blob) result[path] = { 'depth': path.count(os.sep), 'filename': force_text(entry.name), 'sha256': sha_hash, 'mime_category': entry_mime_category, 'mimetype': mimetype, 'path': path, 'size': blob.size if blob is not None else None, 'modified': commit_time, } return result
def _get_files(self, locale=None): result = OrderedDict() for path in get_all_files(self.dest): path = force_text(path, errors='replace') filename = os.path.basename(path) short = path[len(self.dest) + 1:] mime, encoding = mimetypes.guess_type(filename) directory = os.path.isdir(path) if not directory: with open(path, 'rb') as fobj: sha256 = get_sha256(fobj) else: sha256 = '' result[short] = { 'id': self.file.id, 'binary': self._is_binary(mime, path), 'depth': short.count(os.sep), 'directory': directory, 'filename': filename, 'full': path, 'sha256': sha256, 'mimetype': mime or 'application/octet-stream', 'syntax': self.get_syntax(filename), 'modified': os.stat(path)[stat.ST_MTIME], 'short': short, 'size': os.stat(path)[stat.ST_SIZE], 'truncated': self.truncate(filename), 'version': self.file.version.version, } return result
def repack_fileupload(upload_pk): log.info('Starting task to repackage FileUpload %s', upload_pk) upload = FileUpload.objects.get(pk=upload_pk) # When a FileUpload is created and a file added to it, if it's a xpi/zip, # it should be move to upload.path, and it should have a .xpi extension, # so we only need to care about that extension here. # We don't trust upload.name: it's the original filename as used by the # developer, so it could be something else. if upload.path.endswith('.xpi'): try: tempdir = extract_zip(upload.path) except Exception: # Something bad happened, maybe we couldn't parse the zip file. # This task should have a on_error attached when called by # Validator(), so we can just raise and the developer will get a # generic error message. log.exception('Could not extract upload %s for repack.', upload_pk) raise log.info('Zip from upload %s extracted, repackaging', upload_pk) file_ = tempfile.NamedTemporaryFile(suffix='.zip', delete=False) shutil.make_archive(os.path.splitext(file_.name)[0], 'zip', tempdir) with open(file_.name, 'rb') as f: upload.hash = 'sha256:%s' % get_sha256(f) log.info('Zip from upload %s repackaged, moving file back', upload_pk) move_stored_file(file_.name, upload.path) upload.save() else: log.info('Not repackaging upload %s, it is not a xpi file.', upload_pk)
def _get_files(self, locale=None): result = OrderedDict() for path in get_all_files(self.dest): filename = force_text(os.path.basename(path), errors='replace') short = force_text(path[len(self.dest) + 1:], errors='replace') mime, encoding = mimetypes.guess_type(filename) directory = os.path.isdir(path) result[short] = { 'id': self.file.id, 'binary': self._is_binary(mime, path), 'depth': short.count(os.sep), 'directory': directory, 'filename': filename, 'full': path, 'sha256': get_sha256(path) if not directory else '', 'mimetype': mime or 'application/octet-stream', 'syntax': self.get_syntax(filename), 'modified': os.stat(path)[stat.ST_MTIME], 'short': short, 'size': os.stat(path)[stat.ST_SIZE], 'truncated': self.truncate(filename), 'version': self.file.version.version, } return result
def _fetch_entries(): for entry_wrapper in self.repo.iter_tree(commit.tree): entry = entry_wrapper.tree_entry path = force_text(entry_wrapper.path) blob = entry_wrapper.blob is_directory = entry.type == 'tree' mime, encoding = mimetypes.guess_type(entry.name) is_binary = ( self.is_binary(path, mime, blob) if not is_directory else False) sha_hash = ( get_sha256(io.BytesIO(memoryview(blob))) if not is_directory else '') commit_tzinfo = FixedOffset(commit.commit_time_offset) commit_time = datetime.fromtimestamp( float(commit.commit_time), commit_tzinfo) result[path] = { 'binary': is_binary, 'depth': path.count(os.sep), 'directory': is_directory, 'filename': force_text(entry.name), 'sha256': sha_hash, 'mimetype': mime or 'application/octet-stream', 'path': path, 'size': blob.size if blob is not None else None, 'modified': commit_time, } return result
def _calculate_hash(): try: blob_or_tree = tree[selected_file] except KeyError: return None if blob_or_tree.type == pygit2.GIT_OBJ_TREE: return None blob = self.git_repo[blob_or_tree.oid] return get_sha256(io.BytesIO(memoryview(blob)))
def repack_fileupload(results, upload_pk): log.info('Starting task to repackage FileUpload %s', upload_pk) upload = FileUpload.objects.get(pk=upload_pk) # When a FileUpload is created and a file added to it, if it's a xpi/zip, # it should be move to upload.path, and it should have a .xpi extension, # so we only need to care about that extension here. # We don't trust upload.name: it's the original filename as used by the # developer, so it could be something else. if upload.path.endswith('.xpi'): timer = StopWatch('files.tasks.repack_fileupload.') timer.start() # tempdir must *not* be on TMP_PATH, we want local fs instead. It will be # deleted automatically once we exit the context manager. with tempfile.TemporaryDirectory(prefix='repack_fileupload_extract') as tempdir: try: extract_zip(upload.path, tempdir=tempdir) except Exception as exc: # Something bad happened, maybe we couldn't parse the zip file. # @validation_task should ensure the exception is caught and # transformed in a generic error message for the developer, so we # just log it and re-raise. log.exception( 'Could not extract upload %s for repack.', upload_pk, exc_info=exc ) raise timer.log_interval('1.extracted') log.info('Zip from upload %s extracted, repackaging', upload_pk) # We'll move the file to its final location below with move_stored_file(), # so don't let tempfile delete it. file_ = tempfile.NamedTemporaryFile(suffix='.zip', delete=False) shutil.make_archive(os.path.splitext(file_.name)[0], 'zip', tempdir) with open(file_.name, 'rb') as f: upload.hash = 'sha256:%s' % get_sha256(f) timer.log_interval('2.repackaged') log.info('Zip from upload %s repackaged, moving file back', upload_pk) move_stored_file(file_.name, upload.path) timer.log_interval('3.moved') upload.save() timer.log_interval('4.end') else: log.info('Not repackaging upload %s, it is not a xpi file.', upload_pk) return results
def _get_files(self, locale=None): """We need the `locale` parameter for the memoization. The `@memoize` decorator uses the prefix *and the parameters* to come up with a memoize key. We thus add a (seemingly useless) `locale` parameter. Otherwise, we would just always have the urls for the files with the locale from the first person checking them. """ result = OrderedDict() for path in get_all_files(self.dest): filename = force_text(os.path.basename(path), errors='replace') short = force_text(path[len(self.dest) + 1:], errors='replace') mime, encoding = mimetypes.guess_type(filename) directory = os.path.isdir(path) result[short] = { 'binary': self._is_binary(mime, path), 'depth': short.count(os.sep), 'directory': directory, 'filename': filename, 'full': path, 'sha256': get_sha256(path) if not directory else '', 'mimetype': mime or 'application/octet-stream', 'syntax': self.get_syntax(filename), 'modified': os.stat(path)[stat.ST_MTIME], 'short': short, 'size': os.stat(path)[stat.ST_SIZE], 'truncated': self.truncate(filename), 'url': reverse('files.list', args=[self.file.id, 'file', short]), 'url_serve': reverse('files.redirect', args=[self.file.id, short]), 'version': self.file.version.version, } return result
def _get_files(self, locale=None): """We need the `locale` parameter for the memoization. The `@memoize` decorator uses the prefix *and the parameters* to come up with a memoize key. We thus add a (seemingly useless) `locale` parameter. Otherwise, we would just always have the urls for the files with the locale from the first person checking them. """ result = OrderedDict() for path in get_all_files(self.dest): filename = force_text(os.path.basename(path), errors='replace') short = force_text(path[len(self.dest) + 1:], errors='replace') mime, encoding = mimetypes.guess_type(filename) directory = os.path.isdir(path) result[short] = { 'binary': self._is_binary(mime, path), 'depth': short.count(os.sep), 'directory': directory, 'filename': filename, 'full': path, 'sha256': get_sha256(path) if not directory else '', 'mimetype': mime or 'application/octet-stream', 'syntax': self.get_syntax(filename), 'modified': os.stat(path)[stat.ST_MTIME], 'short': short, 'size': os.stat(path)[stat.ST_SIZE], 'truncated': self.truncate(filename), 'url': reverse('files.list', args=[self.file.id, 'file', short]), 'url_serve': reverse('files.redirect', args=[self.file.id, short]), 'version': self.file.version.version, } return result
def generate_hash(self, filename=None): """Generate a hash for a file.""" with open(filename or self.current_file_path, 'rb') as fobj: return 'sha256:{}'.format(get_sha256(fobj))
def _calculate_hash(): if blob is None: return None return get_sha256(io.BytesIO(memoryview(blob)))
def repack_fileupload(results, upload_pk): log.info('Starting task to repackage FileUpload %s', upload_pk) upload = FileUpload.objects.get(pk=upload_pk) # When a FileUpload is created and a file added to it, if it's a xpi/zip, # it should be move to upload.path, and it should have a .zip extension, # so we only need to care about that extension here. # We don't trust upload.name: it's the original filename as used by the # developer, so it could be something else. if upload.path.endswith('.zip'): timer = StopWatch('files.tasks.repack_fileupload.') timer.start() # tempdir must *not* be on TMP_PATH, we want local fs instead. It will be # deleted automatically once we exit the context manager. with tempfile.TemporaryDirectory( prefix='repack_fileupload_extract') as tempdir: try: extract_zip(upload.path, tempdir=tempdir) if waffle.switch_is_active('enable-manifest-normalization'): manifest = Path(tempdir) / 'manifest.json' if manifest.exists(): try: xpi_data = parse_xpi(upload.path, minimal=True) if not xpi_data.get('is_mozilla_signed_extension', False): json_data = ManifestJSONExtractor( manifest.read_bytes()).data manifest.write_text( json.dumps(json_data, indent=2)) except Exception: # If we cannot normalize the manifest file, we skip # this step and let the linter catch the exact # cause in order to return a more appropriate error # than "unexpected error", which would happen if # this task was handling the error itself. pass except Exception as exc: # Something bad happened, maybe we couldn't parse the zip file. # @validation_task should ensure the exception is caught and # transformed in a generic error message for the developer, so we # just log it and re-raise. log.exception('Could not extract upload %s for repack.', upload_pk, exc_info=exc) raise timer.log_interval('1.extracted') log.info('Zip from upload %s extracted, repackaging', upload_pk) # We'll move the file to its final location below with move_stored_file(), # so don't let tempfile delete it. file_ = tempfile.NamedTemporaryFile(dir=settings.TMP_PATH, suffix='.zip', delete=False) shutil.make_archive( os.path.splitext(file_.name)[0], 'zip', tempdir) with open(file_.name, 'rb') as f: upload.hash = 'sha256:%s' % get_sha256(f) timer.log_interval('2.repackaged') log.info('Zip from upload %s repackaged, moving file back', upload_pk) storage.move_stored_file(file_.name, upload.path) timer.log_interval('3.moved') upload.save() timer.log_interval('4.end') else: log.info('Not repackaging upload %s, it is not a zip file.', upload_pk) return results
def generate_hash(self, filename=None): """Generate a hash for a file.""" with open(filename or self.current_file_path, 'rb') as fobj: return 'sha256:{}'.format(get_sha256(fobj))