def file_list(self, element, uid): cache_filename = 'filelist_%s' % element.sha256 key = storage(settings.STORAGE_CACHE).uid_to_key(uid) fileobj = storage(settings.STORAGE_CACHE).get_file(key, cache_filename) if fileobj is None: tmpfile = tempfile.NamedTemporaryFile(dir=settings.FILE_UPLOAD_TEMP_DIR) archive_file = storage(settings.STORAGE_ARCHIVE).get_file(element.archive_key, sub_path='') ar_file = ArFile(element.filename, mode='r', fileobj=archive_file) data_file, data_file_name = self.get_subfile(ar_file, 'data.tar.') mode = 'r:*' if data_file_name.endswith('.xz') or data_file_name.endswith('.lzma'): data_file_content = data_file.read() data_file_content_uncompressed = lzma.decompress(data_file_content) data_file.close() data_file = io.BytesIO(data_file_content_uncompressed) mode = 'r' tar_file = tarfile.open(name='data', mode=mode, fileobj=data_file) members = tar_file.getmembers() members = filter(lambda x: x.isfile(), members) names = [x.path[2:] for x in members] tar_file.close() ar_file.close() archive_file.close() for name in names: tmpfile.write(('%s\n' % name).encode('utf-8')) tmpfile.flush() tmpfile.seek(0) storage(settings.STORAGE_CACHE).store_descriptor(uid, cache_filename, tmpfile) tmpfile.close() else: names = [line.strip().decode() for line in fileobj] fileobj.close() return names
def compress_files(open_files: dict, root: str, uid: str) -> list: """ Return a list of tuples ((os.path.relpath(filename, root), md5, sha1, sha256, actual_size). Also stores the generated files (and original ones) :param open_files: dict[filename] = open file descriptor in mode w+b :param root: :param uid: :return: """ hash_controls = [] for filename, package_file in open_files.items(): package_file.seek(0) gz_filename = filename + '.gz' bz2_filename = filename + '.bz2' xz_filename = filename + '.xz' gz_file = tempfile.TemporaryFile(mode='w+b', dir=settings.FILE_UPLOAD_TEMP_DIR) bz2_file = tempfile.TemporaryFile(mode='w+b', dir=settings.FILE_UPLOAD_TEMP_DIR) xz_file = tempfile.TemporaryFile(mode='w+b', dir=settings.FILE_UPLOAD_TEMP_DIR) bz2_compressor = bz2.BZ2Compressor(9) if lzma is not None: xz_compressor = lzma.LZMACompressor() else: xz_compressor = bz2_compressor with gzip.GzipFile(gz_filename, mode='wb', compresslevel=9, fileobj=gz_file) as fd_gz: data = package_file.read(10240) while data: fd_gz.write(data) bz2_file.write(bz2_compressor.compress(data)) xz_file.write(xz_compressor.compress(data)) data = package_file.read(10240) bz2_file.write(bz2_compressor.flush()) xz_file.write(xz_compressor.flush()) all_files = [(package_file, filename), (gz_file, gz_filename), (bz2_file, bz2_filename), ] if lzma is not None: all_files.append((xz_file, xz_filename)) for obj, filename_ in all_files: obj.flush() obj.seek(0) data = obj.read(32768) md5, sha1, sha256, size = hashlib.md5(), hashlib.sha1(), hashlib.sha256(), 0 while data: md5.update(data) sha1.update(data) sha256.update(data) size += len(data) data = obj.read(32768) hash_controls.append((os.path.relpath(filename_, root), md5.hexdigest(), sha1.hexdigest(), sha256.hexdigest(), size)) obj.seek(0) storage(settings.STORAGE_CACHE).store_descriptor(uid, filename_, obj) obj.close() # build the following files: return hash_controls
def __write_file(self, repo, dest_filename, write_function): uid = self.storage_uid % repo.pk key = storage(settings.STORAGE_CACHE).uid_to_key(uid) plain_file = tempfile.NamedTemporaryFile(mode='w+b', dir=settings.TEMP_ROOT, delete=False) gz_plain_file = tempfile.NamedTemporaryFile(mode='w+b', dir=settings.TEMP_ROOT, delete=False) gz_file = gzip.open(gz_plain_file, 'wb') write_function(plain_file) plain_file.flush() plain_file.seek(0) for block in iter(lambda: plain_file.read(8192), b''): gz_file.write(block) gz_file.close() gz_plain_file.close() storage(settings.STORAGE_CACHE).import_filename(plain_file.name, key, dest_filename) storage(settings.STORAGE_CACHE).import_filename(gz_plain_file.name, key, dest_filename + '.gz')
def index_file(self, request, rid, filename, mimetype): repo = get_object_or_404(Repository.reader_queryset(request), id=rid, archive_type=self.archive_type) uid = self.storage_uid % repo.id key = storage(settings.STORAGE_CACHE).uid_to_key(uid) return sendpath(settings.STORAGE_CACHE, key, filename, mimetype)
def update_element(self, element): """ Extract some informations from element to prepare the repository :param element: Element to add to the repository :return: Unicode string containing meta-data ar -x control.tar.gz tar -xf control.tar.gz control """ archive_file = storage(settings.STORAGE_ARCHIVE).get_file(element.archive_key) py_archive = self.open_file(element.filename, archive_file) if py_archive is None: raise InvalidRepositoryException(_('Unable to open file')) try: control_data_value = py_archive.get_pkg_info() if not control_data_value: raise InvalidRepositoryException(_('No control data in archive')) element.extra_data = control_data_value control_data = parse_control_data(control_data_value, continue_line=' ', skip_after_blank=True) for key, attr in (('Name', 'archive'), ('Version', 'version'), ('Home-page', 'official_link'), ('Description', 'long_description')): if key in control_data: setattr(element, attr, control_data.get(key, '')) element.archive = element.archive.replace('-', '').replace('_', '') element.name = element.archive finally: py_archive.close() archive_file.close()
def update_element(self, element): """ Extract some informations from element to prepare the repository :param element: Element to add to the repository :return: Unicode string containing meta-data ar -x control.tar.gz tar -xf control.tar.gz control """ archive_file = storage(settings.STORAGE_ARCHIVE).get_file(element.archive_key, '') gem_fd = tarfile.open(fileobj=archive_file, mode='r') metadata_fd = gem_fd.extractfile('metadata.gz') metadata_gz_content = metadata_fd.read() metadata_bytes = gzip.decompress(metadata_gz_content) gem_fd.close() data = yaml.load(io.BytesIO(metadata_bytes), Loader=RubyLoader) for key, attr in (('name', 'archive'), ('homepage', 'official_link'), ('summary', 'long_description'), ('name', 'name')): if key in data.values: setattr(element, attr, data.values[key]) element.version = data.values['version'].version p = subprocess.Popen(['ruby', '-e', 'puts Marshal.dump (Gem::Specification.from_yaml(ARGF.read))'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdout, stderr = p.communicate(metadata_bytes) extra_data = {'yaml': metadata_bytes.decode('utf-8'), 'marshal': base64.b64encode(stdout).decode('utf-8')} element.extra_data = json.dumps(extra_data)
def update_element(self, element): """ Extract some informations from element to prepare the repository :param element: Element to add to the repository :return: Unicode string containing meta-data ar -x control.tar.gz tar -xf control.tar.gz control """ archive_file = storage(settings.STORAGE_ARCHIVE).get_file(element.archive_key) ar_file = ArFile(element.filename, mode='r', fileobj=archive_file) control_file, control_file_name = self.get_subfile(ar_file, 'control.tar.') if control_file is None: raise InvalidRepositoryException('No control file found in .deb package') mode = 'r:*' if control_file_name.endswith('.xz') or control_file_name.endswith('.lzma'): control_file_content = control_file.read() control_file_content_uncompressed = lzma.decompress(control_file_content) control_file.close() control_file = io.BytesIO(control_file_content_uncompressed) mode = 'r' tar_file = tarfile.open(name='control', mode=mode, fileobj=control_file) control_data = tar_file.extractfile('./control') # poulating different informations on the element control_data_value = control_data.read().decode('utf-8') control_data.close() tar_file.close() ar_file.close() archive_file.close() element.extra_data = control_data_value parsed_data = parse_control_data(control_data_value) element.archive = parsed_data['Package'] element.version = parsed_data['Version'] element.official_link = parsed_data.get('Homepage', '') element.long_description = parsed_data.get('Description', '')
def update_element(self, element): """ Extract some informations from element to prepare the repository :param element: Element to add to the repository :return: Unicode string containing meta-data ar -x control.tar.gz tar -xf control.tar.gz control """ archive_file = storage(settings.STORAGE_ARCHIVE).get_file( element.archive_key, '') gem_fd = tarfile.open(fileobj=archive_file, mode='r') metadata_fd = gem_fd.extractfile('metadata.gz') metadata_gz_content = metadata_fd.read() metadata_bytes = gzip.decompress(metadata_gz_content) gem_fd.close() data = yaml.load(io.BytesIO(metadata_bytes), Loader=RubyLoader) for key, attr in (('name', 'archive'), ('homepage', 'official_link'), ('summary', 'long_description'), ('name', 'name')): if key in data.values: setattr(element, attr, data.values[key]) element.version = data.values['version'].version p = subprocess.Popen([ 'ruby', '-e', 'puts Marshal.dump (Gem::Specification.from_yaml(ARGF.read))' ], stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdout, stderr = p.communicate(metadata_bytes) extra_data = { 'yaml': metadata_bytes.decode('utf-8'), 'marshal': base64.b64encode(stdout).decode('utf-8') } element.extra_data = json.dumps(extra_data)
def update_element(self, element): """ Extract some informations from element to prepare the repository :param element: Element to add to the repository :return: Unicode string containing meta-data """ if element.archive: element.name = element.archive.rpartition('.')[2] if element.filename.endswith('.jar') and False: archive_file = storage(settings.STORAGE_ARCHIVE).get_file(element.archive_key) compressed_file = zipfile.ZipFile(archive_file) prefix = os.path.commonprefix(compressed_file.namelist()) control_data_file = compressed_file.open(os.path.join(prefix, 'META-INF', 'MANIFEST.MF')) control_data_value = control_data_file.read().decode('utf-8') control_data_file.close() compressed_file.close() archive_file.close() element.extra_data = control_data_value control_data = parse_control_data(control_data_value, continue_line=' ') for key, attr in (('Bundle-SymbolicName', 'name'), ('Bundle-Version', 'version'), ('Implementation-Title', 'archive'), ('Implementation-Version', 'version'), ('Name', 'name'),): if key in control_data: # archive : PackageName, name : Organization Name setattr(element, attr, control_data.get(key, ''))
def update_element(self, element): fd = storage(settings.STORAGE_ARCHIVE).get_file(element.archive_key, sub_path='') rpm_obj = rpm.RPM(fd) element.filename = rpm_obj.canonical_filename element.version = rpm_obj.header.version element.archive = rpm_obj.header.name header = {} signature = {} for (obj_dict, header_base) in ((header, rpm_obj.header), (signature, rpm_obj.signature)): available = {} for entry in header_base: available[entry.tag] = entry.value for attr_name, infos in header_base.TAGS.items(): attr_value = available.get(infos[0], infos[1]) if not isinstance(attr_value, bytes): obj_dict[attr_name] = attr_value rpm_ = {'binary': rpm_obj.binary, 'canonical_filename': rpm_obj.canonical_filename, 'checksum': rpm_obj.checksum, 'filesize': rpm_obj.filesize, 'source': rpm_obj.source, 'filelist': [{'type': x.type, 'name': x.name, } for x in rpm_obj.filelist], 'provides': [{'name': x.name, 'str_flags': x.str_flags, 'version': list(x.version)} for x in rpm_obj.provides], 'requires': [{'name': x.name, 'str_flags': x.str_flags, 'version': list(x.version)} for x in rpm_obj.requires], 'changelog': [{'name': x.name, 'time': x.time, 'text': x.text, } for x in rpm_obj.changelog], 'obsoletes': [{'name': x.name, 'str_flags': x.str_flags, 'version': list(x.version)} for x in rpm_obj.obsoletes], 'conflicts': [{'name': x.name, 'str_flags': x.str_flags, 'version': list(x.version)} for x in rpm_obj.conflicts], 'header_range': list(rpm_obj.header.header_range), } rpm_dict = {'header': header, 'signature': signature, 'rpm': rpm_, } element.extra_data = json.dumps(rpm_dict)
def update_element(self, element): """ Extract some informations from element to prepare the repository :param element: Element to add to the repository :return: Unicode string containing meta-data """ if element.archive: element.name = element.archive.rpartition('.')[2] if element.filename.endswith('.jar') and False: archive_file = storage(settings.STORAGE_ARCHIVE).get_file( element.archive_key) compressed_file = zipfile.ZipFile(archive_file) prefix = os.path.commonprefix(compressed_file.namelist()) control_data_file = compressed_file.open( os.path.join(prefix, 'META-INF', 'MANIFEST.MF')) control_data_value = control_data_file.read().decode('utf-8') control_data_file.close() compressed_file.close() archive_file.close() element.extra_data = control_data_value control_data = parse_control_data(control_data_value, continue_line=' ') for key, attr in ( ('Bundle-SymbolicName', 'name'), ('Bundle-Version', 'version'), ('Implementation-Title', 'archive'), ('Implementation-Version', 'version'), ('Name', 'name'), ): if key in control_data: # archive : PackageName, name : Organization Name setattr(element, attr, control_data.get(key, ''))
def __write_file(self, repo, dest_filename, write_function): uid = self.storage_uid % repo.pk key = storage(settings.STORAGE_CACHE).uid_to_key(uid) plain_file = tempfile.NamedTemporaryFile( mode='w+b', dir=settings.FILE_UPLOAD_TEMP_DIR, delete=False) gz_plain_file = tempfile.NamedTemporaryFile( mode='w+b', dir=settings.FILE_UPLOAD_TEMP_DIR, delete=False) gz_file = gzip.open(gz_plain_file, 'wb') write_function(plain_file) plain_file.flush() plain_file.seek(0) for block in iter(lambda: plain_file.read(8192), b''): gz_file.write(block) gz_file.close() gz_plain_file.close() storage(settings.STORAGE_CACHE).import_filename( plain_file.name, key, dest_filename) storage(settings.STORAGE_CACHE).import_filename( gz_plain_file.name, key, dest_filename + '.gz')
def specs(self, request, rid, repo_slug, state_slug=None, filename='specs.4.8.gz'): # noinspection PyUnusedLocal repo_slug = repo_slug repo = get_object_or_404(Repository.reader_queryset(request), id=rid, archive_type=self.archive_type) if state_slug: filename = 'specs/%(slug)s/%(filename)s' % {'slug': state_slug, 'filename': filename, } else: filename = 'specs/%(filename)s' % {'filename': filename, } uid = self.storage_uid % repo.pk key = storage(settings.STORAGE_CACHE).uid_to_key(uid) return sendpath(settings.STORAGE_CACHE, key, filename, 'application/gzip')
def repodata_file(self, request, rid, repo_slug, state_slug, arch, filename, compression): if filename not in ('comps.xml', 'primary.xml', 'other.xml', 'filelists.xml', 'repomd.xml', ): return HttpResponse(_('File not found'), status=404) if compression and filename == 'repomd.xml': return HttpResponse(_('File not found'), status=404) # noinspection PyUnusedLocal repo_slug = repo_slug filename = self.index_filename(state_slug, arch, filename + compression) mimetype = 'text/xml' repo = get_object_or_404(Repository.reader_queryset(request), id=rid, archive_type=self.archive_type) uid = self.storage_uid % repo.id key = storage(settings.STORAGE_CACHE).uid_to_key(uid) return sendpath(settings.STORAGE_CACHE, key, filename, mimetype)
def sendpath(storage_type, key, path, mimetype): storage_obj = storage(storage_type) filesize = storage_obj.get_size(key, path) full_path = storage_obj.get_path(key, path) if full_path: return send_file(full_path) fileobj = storage_obj.get_file(key, path) if fileobj is None: raise Http404 response = StreamingHttpResponse(read_file_in_chunks(fileobj), content_type=mimetype) if mimetype[0:4] != 'text' and mimetype[0:5] != 'image': response['Content-Disposition'] = 'attachment; filename={0}'.format(os.path.basename(path)) response['Content-Length'] = filesize return response
def file_list(self, element, uid): cache_filename = 'filelist_%s' % element.sha256 key = storage(settings.STORAGE_CACHE).uid_to_key(uid) fileobj = storage(settings.STORAGE_CACHE).get_file(key, cache_filename) if fileobj is None: tmpfile = tempfile.NamedTemporaryFile(dir=settings.TEMP_ROOT) archive_file = storage(settings.STORAGE_ARCHIVE).get_file( element.archive_key, sub_path='') ar_file = ArFile(element.filename, mode='r', fileobj=archive_file) data_file, data_file_name = self.get_subfile(ar_file, 'data.tar.') mode = 'r:*' if data_file_name.endswith('.xz') or data_file_name.endswith( '.lzma'): data_file_content = data_file.read() data_file_content_uncompressed = lzma.decompress( data_file_content) data_file.close() data_file = io.BytesIO(data_file_content_uncompressed) mode = 'r' tar_file = tarfile.open(name='data', mode=mode, fileobj=data_file) members = tar_file.getmembers() members = filter(lambda x: x.isfile(), members) names = [x.path[2:] for x in members] tar_file.close() ar_file.close() archive_file.close() for name in names: tmpfile.write(('%s\n' % name).encode('utf-8')) tmpfile.flush() tmpfile.seek(0) storage(settings.STORAGE_CACHE).store_descriptor( uid, cache_filename, tmpfile) tmpfile.close() else: names = [line.strip().decode() for line in fileobj] fileobj.close() return names
def sendpath(storage_type, key, path, mimetype): storage_obj = storage(storage_type) filesize = storage_obj.get_size(key, path) full_path = storage_obj.get_path(key, path) if full_path: return send_file(full_path) fileobj = storage_obj.get_file(key, path) if fileobj is None: raise Http404 response = StreamingHttpResponse(read_file_in_chunks(fileobj), content_type=mimetype) if mimetype[0:4] != 'text' and mimetype[0:5] != 'image': response['Content-Disposition'] = 'attachment; filename={0}'.format( os.path.basename(path)) response['Content-Length'] = filesize return response
def update_element(self, element): """ Extract some informations from element to prepare the repository :param element: Element to add to the repository :return: Unicode string containing meta-data """ if element.archive: element.name = element.archive.rpartition('.')[2] archive_file = storage(settings.STORAGE_ARCHIVE).get_file(element.archive_key) compressed_file = tarfile.open(name=None, fileobj=archive_file, mode='r') all_names = {x for x in compressed_file.getnames()} provider = 'virtualbox' if 'metadata.json' in all_names: metadata_file = compressed_file.extractfile('metadata.json') metadata = json.loads(metadata_file.read().decode('utf-8')) provider = metadata['provider'] element.extra_data = json.dumps({'provider': provider})
def update_element(self, element): """ Extract some informations from element to prepare the repository :param element: Element to add to the repository :return: Unicode string containing meta-data ar -x control.tar.gz tar -xf control.tar.gz control """ archive_file = storage(settings.STORAGE_ARCHIVE).get_file( element.archive_key) ar_file = ArFile(element.filename, mode='r', fileobj=archive_file) control_file, control_file_name = self.get_subfile( ar_file, 'control.tar.') if control_file is None: raise InvalidRepositoryException( 'No control file found in .deb package') mode = 'r:*' if control_file_name.endswith('.xz') or control_file_name.endswith( '.lzma'): control_file_content = control_file.read() control_file_content_uncompressed = lzma.decompress( control_file_content) control_file.close() control_file = io.BytesIO(control_file_content_uncompressed) mode = 'r' tar_file = tarfile.open(name='control', mode=mode, fileobj=control_file) control_data = tar_file.extractfile('./control') # poulating different informations on the element control_data_value = control_data.read().decode('utf-8') control_data.close() tar_file.close() ar_file.close() archive_file.close() element.extra_data = control_data_value parsed_data = parse_control_data(control_data_value) element.archive = parsed_data['Package'] element.version = parsed_data['Version'] element.official_link = parsed_data.get('Homepage', '') element.long_description = parsed_data.get('Description', '')
def update_element(self, element): """ Extract some informations from element to prepare the repository :param element: Element to add to the repository :return: Unicode string containing meta-data """ if element.archive: element.name = element.archive.rpartition('.')[2] archive_file = storage(settings.STORAGE_ARCHIVE).get_file( element.archive_key) compressed_file = tarfile.open(name=None, fileobj=archive_file, mode='r') all_names = {x for x in compressed_file.getnames()} provider = 'virtualbox' if 'metadata.json' in all_names: metadata_file = compressed_file.extractfile('metadata.json') metadata = json.loads(metadata_file.read().decode('utf-8')) provider = metadata['provider'] element.extra_data = json.dumps({'provider': provider})
def specs(self, request, rid, repo_slug, state_slug=None, filename='specs.4.8.gz'): # noinspection PyUnusedLocal repo_slug = repo_slug repo = get_object_or_404(Repository.reader_queryset(request), id=rid, archive_type=self.archive_type) if state_slug: filename = 'specs/%(slug)s/%(filename)s' % { 'slug': state_slug, 'filename': filename, } else: filename = 'specs/%(filename)s' % { 'filename': filename, } uid = self.storage_uid % repo.pk key = storage(settings.STORAGE_CACHE).uid_to_key(uid) return sendpath(settings.STORAGE_CACHE, key, filename, 'application/gzip')
def get_file(request: HttpRequest, eid: int, compression: str=None, path: str='', element: Element=None, name: str=None): """ Send file to the client as a HttpResponse Multiple combinations: * case 1) if path != '' => send a file inside a compressed archive * case 2) elif compression is not None => required uncompressed archive to compress it to the new format * case 3) else => require original file :param request: :param eid: :param compression: :param path: :param element: avoid an extra DB query to fetch :param name: :return: """ # noinspection PyUnusedLocal name = name if element is None: element = get_object_or_404(Element.reader_queryset(request).select_related(), id=eid) arc_storage, arc_key, arc_path = None, None, None mimetype = 'application/octet-stream' if element.uncompressed_key and path: # case 1 path = os.path.normpath(path) if path.startswith('../'): raise Http404 elif element.uncompressed_key and compression is not None: # case 2 arc_storage, arc_key, arc_path = storage(settings.STORAGE_UNCOMPRESSED), element.uncompressed_key, path elif element.archive_key: # case 2 or 3 if compression is not None: # case 2 arc_storage, arc_key, arc_path = storage(settings.STORAGE_ARCHIVE), element.archive_key, '' else: raise Http404 if arc_storage is not None: temp_file = tempfile.TemporaryFile(mode='w+b', dir=settings.TEMP_ROOT) comp_file = None ext = '' if compression == 'zip': mimetype = 'application/zip' ext = '.zip' comp_file = zipfile.ZipFile(temp_file, 'w', zipfile.ZIP_DEFLATED) elif compression == 'tgz': mimetype = 'application/x-tar' ext = '.tgz' comp_file = tarfile.open(None, 'w:gz', fileobj=temp_file) comp_file.write = comp_file.addfile elif compression == 'tbz': mimetype = 'application/x-tar' ext = '.tbz' comp_file = tarfile.open(None, 'w:bz2', fileobj=temp_file) comp_file.write = comp_file.addfile reldir = None for root, dirs, files in arc_storage.walk(arc_key, arc_path): if reldir is None: reldir = root for name in files: fullname = os.path.join(root, name) fileobj = arc_storage.get_file(arc_key, fullname) arcname = os.path.relpath(fullname, reldir) tarinfo = tarfile.TarInfo(arcname) tarinfo.size = arc_storage.get_size(arc_key, fullname) comp_file.write(tarinfo, fileobj) comp_file.close() temp_file.seek(0) fileobj = temp_file filename = os.path.basename(element.filename) + ext elif path: mimetype = mimetypes.guess_type(path)[0] if mimetype is None: mimetype = 'application/octet-stream' return sendpath(settings.STORAGE_UNCOMPRESSED, element.uncompressed_key, path, mimetype) else: return sendpath(settings.STORAGE_ARCHIVE, element.archive_key, '', element.mimetype) response = StreamingHttpResponse(read_file_in_chunks(fileobj), content_type=mimetype) if mimetype[0:4] != 'text' and mimetype[0:5] != 'image': response['Content-Disposition'] = 'attachment; filename={0}'.format(filename) return response
def generate_indexes(self, repository, states=None, validity=365): default_architectures = {'amd64', } uid = self.storage_uid % repository.id repo_slug = repository.slug root_url = reverse('repository:%s:index' % self.archive_type, kwargs={'rid': repository.id, }) if repository.is_private: root_url = 'authb-%s' % root_url if states is None: states = list(ArchiveState.objects.filter(repository=repository).order_by('name')) states = [state for state in states if Element.objects.filter(repository=repository, states=state).count() > 0] all_states_architectures = set() all_states = set() open_files = {} complete_file_list = {} root = 'dists/%(repo)s/' % {'repo': repo_slug} # list all available architectures (required to add architecture-independent packages to all archs) for element in Element.objects.filter(repository=repository): control_data = parse_control_data(element.extra_data) architecture = control_data.get('Architecture', 'all') all_states_architectures.add(architecture) # build the following files: # * dists/(group)/(state)/binary-(architecture)/Packages # * dists/(group)/(state)/binary-(architecture)/Release # prepare data for: # * dists/(group)/Contents-(architecture) if not all_states_architectures or all_states_architectures == {'all'}: all_states_architectures = default_architectures for state in states: state_architectures = set() all_states.add(state.name) for element in Element.objects.filter(repository=repository, states=state).order_by('filename'): control_data = parse_control_data(element.extra_data) architecture = control_data.get('Architecture', 'all') section = control_data.get('Section', 'contrib') package_file_list = ["%- 100s%s\n" % (x, section) for x in self.file_list(element, uid)] if architecture == 'all': elt_architectures = default_architectures else: elt_architectures = {architecture, } state_architectures |= elt_architectures for architecture in elt_architectures: complete_file_list.setdefault(architecture, []) complete_file_list[architecture] += package_file_list filename = 'dists/%(repo)s/%(state)s/binary-%(architecture)s/Packages' % { 'repo': repo_slug, 'state': state.name, 'architecture': architecture, } if filename not in open_files: open_files[filename] = tempfile.TemporaryFile(mode='w+b', dir=settings.FILE_UPLOAD_TEMP_DIR) package_file = open_files[filename] package_file.write(element.extra_data.encode('utf-8')) for key, attr in (('MD5sum', 'md5'), ('SHA1', 'sha1'), ('SHA256', 'sha256'), ('Size', 'filesize')): if key not in control_data: package_file.write("{0}: {1}\n".format(key, getattr(element, attr)).encode('utf-8')) package_url = reverse('repository:%s:get_file' % self.archive_type, kwargs={'rid': repository.id, 'repo_slug': repo_slug, 'filename': element.filename, 'state_slug': state.slug, 'folder': element.filename[0:1], }) package_url = os.path.relpath(package_url, root_url) package_file.write("Filename: {0}\n".format(package_url).encode('utf-8')) package_file.write("\n".encode('utf-8')) if len(state_architectures) == 0: state_architectures = default_architectures # we process elements for architecture in state_architectures: filename = 'dists/%(repo)s/%(state)s/binary-%(architecture)s/Release' % { 'repo': repo_slug, 'state': state.slug, 'architecture': architecture, } open_files[filename] = tempfile.TemporaryFile(mode='w+b', dir=settings.FILE_UPLOAD_TEMP_DIR) content = render_to_string('repositories/aptitude/architecture_release.txt', {'architecture': architecture, 'repository': repository, 'state': state, }) open_files[filename].write(content.encode('utf-8')) # build the following files: # * dists/(group)/Contents-(architecture) for architecture, file_list in complete_file_list.items(): file_list.sort() filename = 'dists/%(repo)s/Contents-%(architecture)s' % {'repo': repo_slug, 'architecture': architecture, } open_files[filename] = tempfile.TemporaryFile(mode='w+b', dir=settings.FILE_UPLOAD_TEMP_DIR) for info in file_list: open_files[filename].write(info.encode('utf-8')) # build the following files: # * dists/(group)/Contents-(architecture).gz/.bz2/.xz # * dists/(group)/(state)/binary-(architecture)/Packages.gz/.bz2/.xz # * dists/(group)/(state)/binary-(architecture)/Release.gz/.bz2/.xz # store all files in the cache hash_controls = self.compress_files(open_files, root, uid) # * dists/(group)/Release # store all files in the cache release_file = tempfile.TemporaryFile(mode='w+b', dir=settings.FILE_UPLOAD_TEMP_DIR) now = datetime.datetime.now(utc) now_str = now.strftime('%a, %d %b %Y %H:%M:%S UTC') # 'Mon, 29 Nov 2010 08:12:51 UTC' until = (now + datetime.timedelta(validity)).strftime('%a, %d %b %Y %H:%M:%S UTC') content = render_to_string('repositories/aptitude/state_release.txt', {'architectures': all_states_architectures, 'until': until, 'states': all_states, 'repository': repository, 'date': now_str}) release_file.write(content.encode('utf-8')) for hash_value, index in (('MD5Sum', 1), ('SHA1', 2), ('SHA256', 3)): release_file.write("{0}:\n".format(hash_value).encode('utf-8')) for line in hash_controls: release_file.write((" %s % 8d %s\n" % (line[index], line[4], line[0])).encode('utf-8')) release_file.flush() release_file.seek(0) filename = 'dists/%(repo)s/Release' % {'repo': repo_slug, } storage(settings.STORAGE_CACHE).store_descriptor(uid, filename, release_file) # build the following files: # * dists/(group)/Release.gpg # store all files in the cache release_file.seek(0) signature_content = GPGSigner().sign_file(release_file, detach=True) release_file.seek(0) inrelease_content = GPGSigner().sign_file(release_file, detach=False) release_file.close() signature_file = tempfile.TemporaryFile(mode='w+b', dir=settings.FILE_UPLOAD_TEMP_DIR) signature_file.write(signature_content.encode('utf-8')) signature_file.flush() signature_file.seek(0) filename = 'dists/%(repo)s/Release.gpg' % {'repo': repo_slug, } storage(settings.STORAGE_CACHE).store_descriptor(uid, filename, signature_file) signature_file.close() inrelease_file = tempfile.TemporaryFile(mode='w+b', dir=settings.FILE_UPLOAD_TEMP_DIR) inrelease_file.write(inrelease_content.encode('utf-8')) inrelease_file.flush() inrelease_file.seek(0) filename = 'dists/%(repo)s/InRelease' % {'repo': repo_slug, } storage(settings.STORAGE_CACHE).store_descriptor(uid, filename, inrelease_file) inrelease_file.close()
def compress_files(open_files: dict, root: str, uid: str) -> list: """ Return a list of tuples ((os.path.relpath(filename, root), md5, sha1, sha256, actual_size). Also stores the generated files (and original ones) :param open_files: dict[filename] = open file descriptor in mode w+b :param root: :param uid: :return: """ hash_controls = [] for filename, package_file in open_files.items(): package_file.seek(0) gz_filename = filename + '.gz' bz2_filename = filename + '.bz2' xz_filename = filename + '.xz' gz_file = tempfile.TemporaryFile(mode='w+b', dir=settings.TEMP_ROOT) bz2_file = tempfile.TemporaryFile(mode='w+b', dir=settings.TEMP_ROOT) xz_file = tempfile.TemporaryFile(mode='w+b', dir=settings.TEMP_ROOT) bz2_compressor = bz2.BZ2Compressor(9) if lzma is not None: xz_compressor = lzma.LZMACompressor() else: xz_compressor = bz2_compressor with gzip.GzipFile(gz_filename, mode='wb', compresslevel=9, fileobj=gz_file) as fd_gz: data = package_file.read(10240) while data: fd_gz.write(data) bz2_file.write(bz2_compressor.compress(data)) xz_file.write(xz_compressor.compress(data)) data = package_file.read(10240) bz2_file.write(bz2_compressor.flush()) xz_file.write(xz_compressor.flush()) all_files = [ (package_file, filename), (gz_file, gz_filename), (bz2_file, bz2_filename), ] if lzma is not None: all_files.append((xz_file, xz_filename)) for obj, filename in all_files: obj.flush() obj.seek(0) data = obj.read(32768) md5, sha1, sha256, size = hashlib.md5(), hashlib.sha1( ), hashlib.sha256(), 0 while data: md5.update(data) sha1.update(data) sha256.update(data) size += len(data) data = obj.read(32768) hash_controls.append( (os.path.relpath(filename, root), md5.hexdigest(), sha1.hexdigest(), sha256.hexdigest(), size)) obj.seek(0) storage(settings.STORAGE_CACHE).store_descriptor( uid, filename, obj) obj.close() # build the following files: return hash_controls
def generate_indexes(self, repository, states=None, validity=365): default_architectures = { 'amd64', } uid = self.storage_uid % repository.id repo_slug = repository.slug root_url = reverse('%s:index' % self.archive_type, kwargs={ 'rid': repository.id, }) if repository.is_private: root_url = 'authb-%s' % root_url if states is None: states = list( ArchiveState.objects.filter( repository=repository).order_by('name')) states = [ state for state in states if Element.objects.filter( repository=repository, states=state).count() > 0 ] all_states_architectures = set() all_states = set() open_files = {} complete_file_list = {} root = 'dists/%(repo)s/' % {'repo': repo_slug} # list all available architectures (required to add architecture-independent packages to all archs) for element in Element.objects.filter(repository=repository): control_data = parse_control_data(element.extra_data) architecture = control_data.get('Architecture', 'all') all_states_architectures.add(architecture) # build the following files: # * dists/(group)/(state)/binary-(architecture)/Packages # * dists/(group)/(state)/binary-(architecture)/Release # prepare data for: # * dists/(group)/Contents-(architecture) if not all_states_architectures or all_states_architectures == {'all'}: all_states_architectures = default_architectures for state in states: state_architectures = set() all_states.add(state.name) for element in Element.objects.filter( repository=repository, states=state).order_by('filename'): control_data = parse_control_data(element.extra_data) architecture = control_data.get('Architecture', 'all') section = control_data.get('Section', 'contrib') package_file_list = [ "%- 100s%s\n" % (x, section) for x in self.file_list(element, uid) ] if architecture == 'all': elt_architectures = default_architectures else: elt_architectures = { architecture, } state_architectures |= elt_architectures for architecture in elt_architectures: complete_file_list.setdefault(architecture, []) complete_file_list[architecture] += package_file_list filename = 'dists/%(repo)s/%(state)s/binary-%(architecture)s/Packages' % { 'repo': repo_slug, 'state': state.name, 'architecture': architecture, } if filename not in open_files: open_files[filename] = tempfile.TemporaryFile( mode='w+b', dir=settings.TEMP_ROOT) package_file = open_files[filename] package_file.write(element.extra_data.encode('utf-8')) for key, attr in (('MD5sum', 'md5'), ('SHA1', 'sha1'), ('SHA256', 'sha256'), ('Size', 'filesize')): if key not in control_data: package_file.write("{0}: {1}\n".format( key, getattr(element, attr)).encode('utf-8')) package_url = reverse('%s:get_file' % self.archive_type, kwargs={ 'rid': repository.id, 'repo_slug': repo_slug, 'filename': element.filename, 'state_slug': state.slug, 'folder': element.filename[0:1], }) package_url = os.path.relpath(package_url, root_url) package_file.write( "Filename: {0}\n".format(package_url).encode('utf-8')) package_file.write("\n".encode('utf-8')) if len(state_architectures) == 0: state_architectures = default_architectures # we process elements for architecture in state_architectures: filename = 'dists/%(repo)s/%(state)s/binary-%(architecture)s/Release' % { 'repo': repo_slug, 'state': state.slug, 'architecture': architecture, } open_files[filename] = tempfile.TemporaryFile( mode='w+b', dir=settings.TEMP_ROOT) content = render_to_string( 'repositories/aptitude/architecture_release.txt', { 'architecture': architecture, 'repository': repository, 'state': state, }) open_files[filename].write(content.encode('utf-8')) # build the following files: # * dists/(group)/Contents-(architecture) for architecture, file_list in complete_file_list.items(): file_list.sort() filename = 'dists/%(repo)s/Contents-%(architecture)s' % { 'repo': repo_slug, 'architecture': architecture, } open_files[filename] = tempfile.TemporaryFile( mode='w+b', dir=settings.TEMP_ROOT) open_files[filename].write( render_to_string('repositories/aptitude/contents.txt').encode( 'utf-8')) for info in file_list: open_files[filename].write(info.encode('utf-8')) # build the following files: # * dists/(group)/Contents-(architecture).gz/.bz2/.xz # * dists/(group)/(state)/binary-(architecture)/Packages.gz/.bz2/.xz # * dists/(group)/(state)/binary-(architecture)/Release.gz/.bz2/.xz # store all files in the cache hash_controls = self.compress_files(open_files, root, uid) # * dists/(group)/Release # store all files in the cache release_file = tempfile.TemporaryFile(mode='w+b', dir=settings.TEMP_ROOT) now = datetime.datetime.now(tz) now_str = now.strftime( '%a, %d %b %Y %H:%M:%S %z') # 'Mon, 29 Nov 2010 08:12:51 UTC' until = ( now + datetime.timedelta(validity)).strftime('%a, %d %b %Y %H:%M:%S %z') content = render_to_string( 'repositories/aptitude/state_release.txt', { 'architectures': all_states_architectures, 'until': until, 'states': all_states, 'repository': repository, 'date': now_str }) release_file.write(content.encode('utf-8')) for hash_value, index in (('MD5Sum', 1), ('SHA1', 2), ('SHA256', 3)): release_file.write("{0}:\n".format(hash_value).encode('utf-8')) for line in hash_controls: release_file.write( (" %s % 8d %s\n" % (line[index], line[4], line[0])).encode('utf-8')) release_file.flush() release_file.seek(0) filename = 'dists/%(repo)s/Release' % { 'repo': repo_slug, } storage(settings.STORAGE_CACHE).store_descriptor( uid, filename, release_file) # build the following files: # * dists/(group)/Release.gpg # store all files in the cache release_file.seek(0) signature = GPGSigner().sign_file(release_file) release_file.close() gpg_file = tempfile.TemporaryFile(mode='w+b', dir=settings.TEMP_ROOT) gpg_file.write(signature.encode('utf-8')) gpg_file.flush() gpg_file.seek(0) filename = 'dists/%(repo)s/Release.gpg' % { 'repo': repo_slug, } storage(settings.STORAGE_CACHE).store_descriptor( uid, filename, gpg_file) gpg_file.close()
def generate_indexes(self, repository, states=None, validity=365): if states is None: states = list(ArchiveState.objects.filter(repository=repository).order_by('name')) revision = int(time.time()) architectures_by_state = {x.slug: set() for x in states} # architectures_by_state[archive_state.slug] = {'x86_64', 'c7', } # load all dict infos and count all architectures rpm_objects = [] package_count_by_state_arch = {x.slug: {'noarch': 0} for x in states} for element in Element.objects.filter(repository=repository).prefetch_related('states'): rpm_dict = json.loads(element.extra_data) rpm_objects.append(rpm_dict) rpm_dict['states'] = [s.slug for s in element.states.all()] package_architecture = rpm_dict['header']['architecture'] or 'noarch' if package_architecture != 'noarch': for state_slug in rpm_dict['states']: architectures_by_state[state_slug].add(package_architecture) for state_slug in rpm_dict['states']: package_count_by_state_arch[state_slug].setdefault(package_architecture, 0) package_count_by_state_arch[state_slug][package_architecture] += 1 # add the count of 'noarch' packages to other architectures for state_slug, package_count_by_arch in package_count_by_state_arch.items(): if len(package_count_by_arch) == 1: # only 'noarch' architecture package_count_by_arch['x86_64'] = 0 architectures_by_state[state_slug] = {'x86_64', } noarch_count = package_count_by_arch['noarch'] del package_count_by_arch['noarch'] for architecture in package_count_by_arch: package_count_by_arch[architecture] += noarch_count # prepare all files open_files = {} for state_slug, architectures in architectures_by_state.items(): for architecture in architectures: def write(name_, data): filename_ = self.index_filename(state_slug, architecture, name_) open_files[filename_].write(data.encode('utf-8')) for name in ('other.xml', 'filelists.xml', 'comps.xml', 'primary.xml', ): filename = self.index_filename(state_slug, architecture, name) open_files[filename] = tempfile.TemporaryFile(mode='w+b', dir=settings.TEMP_ROOT) write(name, '<?xml version="1.0" encoding="UTF-8"?>\n') package_count = package_count_by_state_arch[state_slug][architecture] write('other.xml', '<otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%d">\n' % package_count) write('filelists.xml', '<filelists xmlns="http://linux.duke.edu/metadata/filelists" packages="%d">\n' % package_count) write('comps.xml', '<!DOCTYPE comps PUBLIC "-//CentOS//DTD Comps info//EN" "comps.dtd">\n') write('comps.xml', '<comps>\n') write('primary.xml', '<metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%d">\n' % package_count) # fill all files with RPMs for rpm_dict in rpm_objects: filelists = render_to_string('repositories/yum/filelists.xml', rpm_dict) primary = render_to_string('repositories/yum/primary.xml', rpm_dict) other = render_to_string('repositories/yum/other.xml', rpm_dict) for state_slug in rpm_dict['states']: architectures = {rpm_dict['header']['architecture'], } if architectures == {'noarch', }: architectures = architectures_by_state[state_slug] for architecture in architectures: open_files[self.index_filename(state_slug, architecture, 'filelists.xml')].write(filelists.encode('utf-8')) open_files[self.index_filename(state_slug, architecture, 'primary.xml')].write(primary.encode('utf-8')) open_files[self.index_filename(state_slug, architecture, 'other.xml')].write(other.encode('utf-8')) # finish all files for state_slug, architectures in architectures_by_state.items(): for architecture in architectures: open_files[self.index_filename(state_slug, architecture, 'other.xml')].write(b'</otherdata>') open_files[self.index_filename(state_slug, architecture, 'filelists.xml')].write(b'</filelists>') open_files[self.index_filename(state_slug, architecture, 'comps.xml')].write(b'</comps>') open_files[self.index_filename(state_slug, architecture, 'primary.xml')].write(b'</metadata>') storage_uid = self.storage_uid % repository.id # generate a compressed version of each file list_of_hashes = self.compress_files(open_files, '', storage_uid) dict_of_hashes = {x[0]: x for x in list_of_hashes} for state_slug, architectures in architectures_by_state.items(): for architecture in architectures: filename = self.index_filename(state_slug, architecture, 'repomd.xml') open_files[filename] = tempfile.TemporaryFile(mode='w+b', dir=settings.TEMP_ROOT) other = self.index_filename(state_slug, architecture, 'other.xml') filelists = self.index_filename(state_slug, architecture, 'filelists.xml') comps = self.index_filename(state_slug, architecture, 'comps.xml') primary = self.index_filename(state_slug, architecture, 'primary.xml') template_values = {'revision': revision, 'other': dict_of_hashes[other], 'filelists': dict_of_hashes[filelists], 'comps': dict_of_hashes[comps], 'primary': dict_of_hashes[primary], 'other_gz': dict_of_hashes[other + '.gz'], 'filelists_gz': dict_of_hashes[filelists + '.gz'], 'comps_gz': dict_of_hashes[comps + '.gz'], 'primary_gz': dict_of_hashes[primary + '.gz'], } repomd = render_to_string('repositories/yum/repomd.xml', template_values) repomd_file = open_files[filename] repomd_file.write(repomd.encode('utf-8')) repomd_file.flush() repomd_file.seek(0) storage(settings.STORAGE_CACHE).store_descriptor(storage_uid, filename, repomd_file)
def get_file(request: HttpRequest, eid: int, compression: str = None, path: str = '', element: Element = None, name: str = None): """ Send file to the client as a HttpResponse Multiple combinations: * case 1) if path != '' => send a file inside a compressed archive * case 2) elif compression is not None => required uncompressed archive to compress it to the new format * case 3) else => require original file :param request: :param eid: :param compression: :param path: :param element: avoid an extra DB query to fetch :param name: :return: """ # noinspection PyUnusedLocal name = name if element is None: element = get_object_or_404( Element.reader_queryset(request).select_related(), id=eid) arc_storage, arc_key, arc_path = None, None, None mimetype = 'application/octet-stream' if element.uncompressed_key and path: # case 1 path = os.path.normpath(path) if path.startswith('../'): raise Http404 elif element.uncompressed_key and compression is not None: # case 2 arc_storage, arc_key, arc_path = storage( settings.STORAGE_UNCOMPRESSED), element.uncompressed_key, path elif element.archive_key: # case 2 or 3 if compression is not None: # case 2 arc_storage, arc_key, arc_path = storage( settings.STORAGE_ARCHIVE), element.archive_key, '' else: raise Http404 if arc_storage is not None: temp_file = tempfile.TemporaryFile(mode='w+b', dir=settings.TEMP_ROOT) comp_file = None ext = '' if compression == 'zip': mimetype = 'application/zip' ext = '.zip' comp_file = zipfile.ZipFile(temp_file, 'w', zipfile.ZIP_DEFLATED) elif compression == 'tgz': mimetype = 'application/x-tar' ext = '.tgz' comp_file = tarfile.open(None, 'w:gz', fileobj=temp_file) comp_file.write = comp_file.addfile elif compression == 'tbz': mimetype = 'application/x-tar' ext = '.tbz' comp_file = tarfile.open(None, 'w:bz2', fileobj=temp_file) comp_file.write = comp_file.addfile reldir = None for root, dirs, files in arc_storage.walk(arc_key, arc_path): if reldir is None: reldir = root for name in files: fullname = os.path.join(root, name) fileobj = arc_storage.get_file(arc_key, fullname) arcname = os.path.relpath(fullname, reldir) tarinfo = tarfile.TarInfo(arcname) tarinfo.size = arc_storage.get_size(arc_key, fullname) comp_file.write(tarinfo, fileobj) comp_file.close() temp_file.seek(0) fileobj = temp_file filename = os.path.basename(element.filename) + ext elif path: mimetype = mimetypes.guess_type(path)[0] if mimetype is None: mimetype = 'application/octet-stream' return sendpath(settings.STORAGE_UNCOMPRESSED, element.uncompressed_key, path, mimetype) else: return sendpath(settings.STORAGE_ARCHIVE, element.archive_key, '', element.mimetype) response = StreamingHttpResponse(read_file_in_chunks(fileobj), content_type=mimetype) if mimetype[0:4] != 'text' and mimetype[0:5] != 'image': response['Content-Disposition'] = 'attachment; filename={0}'.format( filename) return response