def _write_gzip_header(self): self.fileobj.write('\037\213') # magic header self.fileobj.write('\010') # compression method fname = os.path.basename(self.name) if fname.endswith(".gz"): fname = fname[:-3] elif fname.endswith(".tgz"): fname = "%s.tar" % fname[:-4] elif fname.endswith(".wrz"): fname = "%s.wrl" % fname[:-4] flags = 0 if fname: flags = gzip.FNAME self.fileobj.write(chr(flags)) gzip.write32u(self.fileobj, int(time())) self.fileobj.write('\002') self.fileobj.write('\377') if fname: if sys.platform == "win32": # Windows is case insensitive by default (although it can be # set to case sensitive), so according to the GZIP spec, we # force the name to lowercase fname = fname.lower() self.fileobj.write( fname.encode("ISO-8859-1", "replace").replace("?", "_") + '\000')
def spider_closed(self, spider, reason): jira_id = spider.custom_settings['JIRA_ID'] self.finish_time = datetime.datetime.now() self.used_time = self.finish_time - self.start_time files = [] for name, compressed in self.files.items(): compressed.fileobj.write(compressed.compress.flush()) gzip.write32u(compressed.fileobj, compressed.crc) gzip.write32u(compressed.fileobj, compressed.size & 0xffffffff) files.append((name + compressed.extension, compressed.mimetype, compressed)) try: size = self.files[spider.name + '-items.json'].size except KeyError: size = 0 stats = spider.crawler.stats.get_stats() dqr_status = stats.pop('columns_stats_information', {}) if ('downloader/exception_count' in stats and stats['downloader/exception_count'] > 0) \ or ('log_count/ERROR' in stats and stats['log_count/ERROR'] > 0): subject = "failed" else: subject = "succeed" mailsender = MailSender.from_settings(self.settings) mailsender.send(to=self.settings.getlist('JOB_NOTIFICATION_EMAILS'), subject='JIRA ID:{} job ends with {}'.format(jira_id, subject), # attachs=files, body=Environment().from_string(config.HTML).render({'stats':stats, 'dqr_status':dqr_status, 'jira':jira_id, 'size':format_size(size)}), mimetype='text/html', _callback=self._catch_mail_sent)
def _write_gzip_header(self): self.fileobj.write('\037\213') self.fileobj.write('\010') # no flags self.fileobj.write('\x00') write32u(self.fileobj, LongType(0)) self.fileobj.write('\002') self.fileobj.write('\377')
def _write_gzip_header(self): self.fileobj.write('\037\213') self.fileobj.write('\010') # no flags self.fileobj.write('\x00') write32u(self.fileobj, long(0)) self.fileobj.write('\002') self.fileobj.write('\377')
def _write_gzip_header(self): # Generate a header that is easily reproduced with gzip -9 -n on # an unix-like system self.fileobj.write('\037\213') # magic header self.fileobj.write('\010') # compression method self.fileobj.write('\000') # flags write32u(self.fileobj, long(0)) # timestamp self.fileobj.write('\002') # max compression self.fileobj.write('\003') # UNIX
def _write_gzip_header(self): self.fileobj.write('\037\213') # magic header self.fileobj.write('\010') # compression method fname = self.filename[:-3] flags = 0 if fname: flags = gzip.FNAME self.fileobj.write(chr(flags)) gzip.write32u(self.fileobj, long(self.timestamp)) self.fileobj.write('\002') self.fileobj.write('\377') if fname: self.fileobj.write(fname + '\000')
def _write_gzip_header(self): self.fileobj.write('\037\213') # magic header self.fileobj.write('\010') # compression method fname = self.filename[:-3] flags = 0 if fname: flags = FNAME self.fileobj.write(chr(flags)) write32u(self.fileobj, int(0)) self.fileobj.write('\002') self.fileobj.write('\377') if fname: self.fileobj.write(fname + '\000')
def _write_gzip_header(self): self.fileobj.write('\037\213') # magic header self.fileobj.write('\010') # compression method fname = self.filename[:-3] flags = 0 if fname: flags = gzip.FNAME self.fileobj.write(chr(flags)) ## This is what WebOb patches: gzip.write32u(self.fileobj, long(0)) self.fileobj.write('\002') self.fileobj.write('\377') if fname: self.fileobj.write(fname + '\000')
def _write_gzip_header(self): self.fileobj.write('\037\213') # magic header self.fileobj.write('\010') # compression method # Python 2.6 deprecates self.filename fname = getattr(self, 'name', None) or self.filename flags = 0 if fname: flags = gzip.FNAME self.fileobj.write(chr(flags)) gzip.write32u(self.fileobj, long(self.timestamp)) self.fileobj.write('\002') self.fileobj.write('\377') if fname: self.fileobj.write(fname + '\000')
def _write_gzip_header(self): self.fileobj.write('\037\213') # magic header self.fileobj.write('\010') # compression method fname = self.filename[:-3] flags = 0 if fname: flags = FNAME self.fileobj.write(chr(flags)) # don't use current time! write32u(self.fileobj, 0L) self.fileobj.write('\002') self.fileobj.write('\377') if fname: self.fileobj.write(fname + '\000')
def close_member(self): """Closes the current member being written. """ # The new member is not yet started, no need to close if self._new_member: return self.fileobj.write(self.compress.flush()) write32u(self.fileobj, self.crc) # self.size may exceed 2GB, or even 4GB write32u(self.fileobj, self.size & 0xFFFFFFFFL) self.size = 0 self.compress = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0) self._new_member = True
def close(self): if self.fileobj is None: return self.fileobj.flush() # What if the CRC is bad or we write it in the wrong place # There is no way to know this until we decompress. And then it could be too late # and we may end up with a coreupted archive. # Make sure unittets always pass. gzip.write32u(self.fileobj, self.crc) gzip.write32u(self.fileobj, self.size & 0xffffffffL) self.fileobj = None if self.myfileobj: self.myfileobj.close() self.myfileobj = None
def _write_gzip_header(self): self.fileobj.write("\037\213") # magic header self.fileobj.write("\010") # compression method fname = self.name if fname and fname.endswith(".gz"): fname = fname[:-3] flags = 0 if fname: flags = gzip.FNAME self.fileobj.write(chr(flags)) gzip.write32u(self.fileobj, long(self.timestamp)) # noqa self.fileobj.write("\002") self.fileobj.write("\377") if fname: self.fileobj.write(fname + "\000")
def close_member(self): """Closes the current member being written. """ # The new member is not yet started, no need to close if self._new_member: return self.fileobj.write(self.compress.flush()) write32u(self.fileobj, self.crc) # self.size may exceed 2GB, or even 4GB write32u(self.fileobj, self.size & 0xffffffff) self.size = 0 self.compress = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0) self._new_member = True
def _write_gzip_header(self): self.fileobj.write('\037\213') # magic header self.fileobj.write('\010') # compression method fname = self.name if fname and fname.endswith('.gz'): fname = fname[:-3] flags = 0 if fname: flags = gzip.FNAME self.fileobj.write(pycompat.bytechr(flags)) gzip.write32u(self.fileobj, int(self.timestamp)) self.fileobj.write('\002') self.fileobj.write('\377') if fname: self.fileobj.write(fname + '\000')
def _write_member_header(self, compressed_size, raw_size): self.fileobj.write(b'\037\213') # magic header, 2 bytes self.fileobj.write(b'\010') # compression method, 1 byte try: # RFC 1952 requires the FNAME field to be Latin-1. Do not # include filenames that cannot be represented that way. fname = os.path.basename(self.name) if not isinstance(fname, bytes): fname = fname.encode('latin-1') if fname.endswith(b'.gz'): fname = fname[:-3] except UnicodeEncodeError: fname = b'' flags = FEXTRA if fname: flags |= FNAME self.fileobj.write(chr(flags).encode('latin-1')) # flags, 1 byte mtime = self._write_mtime if mtime is None: mtime = time.time() write32u(self.fileobj, int(mtime)) # modified time, 4 bytes self.fileobj.write(b'\002') # fixed flag (maximum compression), 1 byte self.fileobj.write(b'\377') # OS (unknown), 1 byte # write extra flag for indexing # XLEN, 20 bytes self.fileobj.write(b'\x14\x00') # extra flag len, 2 bytes # EXTRA FLAG FORMAT: # +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ # |SI1|SI2| LEN | MEMBER SIZE (8 Bytes) | RAW SIZE (8 Bytes) | # +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ # SI1, SI2: Subfield ID, 'IG' (Indexed Gzip file) # LEN: Length of subfield body, always 16 bits # MEMBER SIZE: The size of current member # RAW SIZE: Raw text size in uint64 (since raw size is not able to represent >4GB file) self.fileobj.write(SID) # subfield ID (IG), 2 bytes # LEN: 16 bytes self.fileobj.write(b'\x10\x00') # subfield len (16), 2 bytes # compressed data size: 16 + 8 + 8 + len(fname) + 1 + data + 8 member_size = 32 + len(fname) + 1 + compressed_size + 8 self.fileobj.write(struct.pack("<Q", member_size)) # member size, 8 bytes # raw data size: self.fileobj.write(struct.pack("<Q", raw_size)) # raw data size, 8 bytes if fname: self.fileobj.write(fname + b'\000') return member_size
def _write_gzip_header(self): self.fileobj.write("\037\213") # magic header self.fileobj.write("\010") # compression method # Python 2.6 deprecates self.filename fname = getattr(self, "name", None) or self.filename if fname and fname.endswith(".gz"): fname = fname[:-3] flags = 0 if fname: flags = gzip.FNAME self.fileobj.write(chr(flags)) gzip.write32u(self.fileobj, long(self.timestamp)) self.fileobj.write("\002") self.fileobj.write("\377") if fname: self.fileobj.write(fname + "\000")
def _write_member_header(self, compressed_size, raw_size): self.fileobj.write(b'\037\213') # magic header, 2 bytes self.fileobj.write(b'\010') # compression method, 1 byte try: # RFC 1952 requires the FNAME field to be Latin-1. Do not # include filenames that cannot be represented that way. fname = os.path.basename(self.name) if not isinstance(fname, bytes): fname = fname.encode('latin-1') if fname.endswith(b'.gz'): fname = fname[:-3] except UnicodeEncodeError: fname = b'' flags = FEXTRA if fname: flags |= FNAME self.fileobj.write(chr(flags).encode('latin-1')) # flags, 1 byte mtime = self._write_mtime if mtime is None: mtime = time.time() write32u(self.fileobj, int(mtime)) # modified time, 4 bytes self.fileobj.write(b'\002') # fixed flag (maximum compression), 1 byte self.fileobj.write(b'\377') # OS (unknown), 1 byte # write extra flag for indexing # XLEN, 8 bytes self.fileobj.write(b'\x08\x00') # extra flag len, 2 bytes # EXTRA FLAG FORMAT: # +---+---+---+---+---+---+---+---+ # |SI1|SI2| LEN | MEMBER SIZE | # +---+---+---+---+---+---+---+---+ # SI1, SI2: Subfield ID, 'IG' (Indexed Gzip file) # LEN: Length of subfield body, always 4 (bytes) # MEMBER SIZE: The size of current member self.fileobj.write(SID) # subfield ID (IG), 2 bytes # LEN: 4 bytes self.fileobj.write(b'\x04\x00') # subfield len (4), 2 bytes # compressed data size: 16 + 4 + len(fname) + 1 + data + 8 # header + member size + filename with zero end + data block + CRC32 and ISIZE member_size = 20 + len(fname) + 1 + compressed_size + 8 if not fname: member_size -= 1 self.fileobj.write(struct.pack("<I", member_size)) # member size, 4 bytes if fname: self.fileobj.write(fname + b'\000') return member_size
def close(self): fileobj = self.fileobj if fileobj is None: return self.fileobj = None try: if self.mode == gzip.WRITE: fileobj.write(self.compress.flush(Z_FINISH)) gzip.write32u(fileobj, self.crc) # self.size may exceed 2GB, or even 4GB gzip.write32u(fileobj, self.size & 0xffffffff) fileobj.flush() finally: myfileobj = self.myfileobj if myfileobj: self.myfileobj = None myfileobj.close()
def close(self): fileobj = self.fileobj if fileobj is None: return self.fileobj = None try: if self.mode == gzip.WRITE: fileobj.write(self.compress.flush(Z_FINISH)) gzip.write32u(fileobj, self.crc) # self.size may exceed 2GB, or even 4GB gzip.write32u(fileobj, self.size & 0xffffffffL) fileobj.flush() finally: myfileobj = self.myfileobj if myfileobj: self.myfileobj = None myfileobj.close()
def _write_member(self, cdata): """ Write a compressed data as a complete gzip member Input: cdata: compressed data, a tuple of compressed result returned by _compress_func() Return: size of member """ size = self._write_member_header( len(cdata[0]) + len(cdata[1]) + len(cdata[2]), cdata[4]) self.fileobj.write(cdata[0]) # buffer data self.fileobj.write(cdata[1]) # body data self.fileobj.write(cdata[2]) # rest data write32u(self.fileobj, cdata[3]) # CRC32 write32u(self.fileobj, cdata[4] & 0xffffffff) # raw data size in 32bits return size
def _write_gzip_header(self): self.fileobj.write('\x1f\x8b') # magic header self.fileobj.write('\x08') # compression method if hasattr(self, 'name'): # 2.6 fname = self.name if fname.endswith(".gz"): fname = fname[:-3] else: fname = self.filename[:-3] flags = 0 if fname: flags = gzip.FNAME self.fileobj.write(chr(flags)) ## This is what WebOb patches: gzip.write32u(self.fileobj, long(0)) self.fileobj.write('\x02\xff') if fname: self.fileobj.write(fname + '\x00')
def _write_gzip_header(self): self.fileobj.write('\037\213') # magic header self.fileobj.write('\010') # compression method # Python 2.6 introduced self.name and deprecated self.filename try: fname = self.name except AttributeError: fname = self.filename if fname and fname.endswith('.gz'): fname = fname[:-3] flags = 0 if fname: flags = gzip.FNAME self.fileobj.write(chr(flags)) gzip.write32u(self.fileobj, long(self.timestamp)) self.fileobj.write('\002') self.fileobj.write('\377') if fname: self.fileobj.write(fname + '\000')
def update_event(self, inp=-1): self.set_output_val(0, gzip.write32u(self.input(0), self.input(1)))