def unexpected_tag(self, tag, tagName): length = self.read_uint16() LOGGER.log( CustomLoggingLevel.EXTRA_DATA, '[0x%x] tag %s(%s) appears unexpected, length: %d.' % (self.fileObject.cur() - 2, tagName, tag.encode('hex'), length)) self.fileObject.read(length - 2)
def read_scandata(self): curPos = self.fileObject.cur() self.scanDataPos = curPos LOGGER.log(CustomLoggingLevel.IMAGE_DEBUG, 'Start to read scan data.') # read all data to improve process speed tmpdata = self.fileObject.read(self.fileObject.size - curPos) index = 0 while self.scanFlag == True: if tmpdata[index] == '\xff': if tmpdata[index + 1] == '\xd9': self.tag_eoi('\xff\xd9') else: self.scanData.append(tmpdata[index]) self.scanData.append(tmpdata[index + 1]) index += 2 else: self.scanData.append(tmpdata[index]) index += 1 if index < len(tmpdata): self.showextradata(tmpdata[index:], curPos + index) self.scanDataLength = len(self.scanData) LOGGER.log( CustomLoggingLevel.IMAGE_INFO, 'Scan data start at 0x%x, length: 0x%x.' % (curPos, self.scanDataLength))
def showextradata(self, data, location): if len(data) > 128: tmpFileObject = FileObject(data) LOGGER.log(CustomLoggingLevel.EXTRA_DATA, '[0x%x] %s' % (location, tmpFileObject.type())) else: LOGGER.log(CustomLoggingLevel.EXTRA_DATA, '[0x%x] > %s' % (location, data))
def tag_dri(self, tag): # 0xFFDD Define Restart Interval length = self.read_uint16() - 2 curPos = '[0x%x]' % self.fileObject.cur() self.restartInterval = self.read_uint16() if length != 2: LOGGER.log(CustomLoggingLevel.EXTRA_DATA, '%s> %s' % (curPos, self.fileObject.read(length - 2))) return self.find_tag('DRI')
def clean_bitstream_remainder(self): remainder = self.streamBuffer[0] & myBitStreamMaskR[ 8 - self.bitStreamStart] if remainder != 0 and remainder != myBitStreamMaskR[ 8 - self.bitStreamStart]: LOGGER.log( CustomLoggingLevel.EXTRA_DATA, '?0x%x? Unsual end of bitstream, is %s. (0x%s)' % (self.scanDataIndex, bin(remainder), self.streamBuffer[0])) self.streamBuffer.remove(self.streamBuffer[0]) self.bitStreamStart = 0
def find_tag(self, tagName): if self.fileObject.read(1) != '\xFF': curPos = '[0x%x]' % self.fileObject.cur() LOGGER.error('%s Can\'t find 0xFF in end of %s.' % (curPos, tagName)) data = [] d = self.fileObject.read(1) while d != '\xFF': data.append(d) d = self.fileObject.read(1) LOGGER.log(CustomLoggingLevel.EXTRA_DATA, '%s> %s' % (curPos, ''.join(data))) return '\xff' + self.fileObject.read(1)
def tag_app(self, tag): # 0xFFE1~0xFFEE Application-specific appID = (ord(tag[0]) << 8) + ord(tag[1]) - 0xFFE0 length = self.read_uint16() - 2 data = self.fileObject.read(length) if not appID in [1, 2, 13, 14]: LOGGER.log( CustomLoggingLevel.OTHER_DATA, '[0x%x] Tag APP%d found.' % (self.fileObject.cur() - length, appID)) else: LOGGER.log( CustomLoggingLevel.OTHER_DATA, '[0x%x] Tag APP%d found, this tag usually not used in file.' % (self.fileObject.cur() - length, appID)) return self.find_tag('APP%d' % appID)
def start(self): if self.fileObject.read(2) == '\xff\xd8': # start of JPEG file tag = self.fileObject.read(2) while self.scanFlag == False and tag != None: try: tag = self.tagMap[tag](tag) except KeyError: tag = self.tag_unknown(tag) LOGGER.log( CustomLoggingLevel.IMAGE_INFO, 'JPEG (ver %d.%d): %d*%dpx , channel: %d, fileLength: 0x%x b.' % (self.version >> 8, self.version & 0xff, self.width, self.height, self.channel, self.fileObject.size)) else: LOGGER.error('JPEG file start mark 0xFFD8 check failed.')
def tag_dht(self, tag): # 0xFFC4 Define Huffman Table(s) length = self.read_uint16() - 2 while length > 0: tableIDByte = self.fileObject.read_uint8() if tableIDByte >> 4 == 0: tableID = tableIDByte & 0xf else: tableID = 2 + tableIDByte & 0xf if tableID < 4: length -= self.huffmantree_decode(tableID) + 1 else: LOGGER.log( CustomLoggingLevel.EXTRA_DATA, '[0x%x] Unknown part of huffman table' % (self.fileObject.cur() - 1)) self.fileObject.read(length) # skip unknown part break return self.find_tag('DHT')
def asc_detect(filename, min_length=5): LOGGER.log(CustomLoggingLevel.OTHER_DATA, "--- ascii detect start --- ") def is_readable(c): readable_chars = "abcdefghijklmnopqrstuvwxyz" + \ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + \ "0123456789" + \ "`~!@#$%^&*()_+[]\{}|;':\",./<>" + \ " "+'\n'+'\t'+'\r' return c < 128 and chr(c) in readable_chars # LOGGER.addHandler(logging.StreamHandler()) file_object = FileObject(filename) pre = -1 data = "" for i in xrange(file_object.size): byte = file_object.read_uint8() if not is_readable(byte): length = i - pre - 1 pre = i if length >= min_length: LOGGER.log(CustomLoggingLevel.ASCII_DATA, "[ascii] at pos 0x%x:\n" % i + data) data = "" else: data += chr(byte) LOGGER.log(CustomLoggingLevel.OTHER_DATA, "--- ascii detect finished --- ")
def get_images(self): result = [] for image in self.images: # print len(image["data"]) color_table = self.globalColorTable if "localColorTableFlag" in image and image[ "localColorTableFlag"] == 1: color_table = image["localColorTable"] data = self.lzw_decode(image["data"], image["LZWMinimumCodeSize"]) w = image["width"] h = image["height"] cur = Image() cur.w = w cur.h = h cur.data = [color_table[i] for i in data] result.append(cur) if len(cur.data) != cur.w * cur.h: LOGGER.log( CustomLoggingLevel.OTHER_DATA, "image %d has wrong width or height " % len(self.result)) return result
def read_tiff_ifd(self, tiffStartPos, p_read_uint16, p_read_uint32, dirEntryPos, tagName): dirCount = 0 while dirEntryPos != 0: entryCount = p_read_uint16(tiffStartPos + dirEntryPos) LOGGER.log( CustomLoggingLevel.IMAGE_DEBUG, '[%s] Tiff data start at 0x%x, directory index: %d, start at: 0x%x, entry count: %d.' % (tagName, tiffStartPos, dirCount, dirEntryPos, entryCount)) for i in range(entryCount): try: dirTag = p_read_uint16(tiffStartPos + dirEntryPos + 2 + 12 * i) dataFormat = p_read_uint16() nComponent = p_read_uint32() dataLength = nComponent * tiffEnumDataTypeLength[dataFormat] if dataLength > 4: dataStartPos = p_read_uint32() data = self.fileObject.read( dataLength, tiffStartPos + dataStartPos) else: data = self.fileObject.read(4) if dirTag == 0x8769: self.read_tiff_ifd( tiffStartPos, p_read_uint16, p_read_uint32, p_read_uint32(tiffStartPos + dirEntryPos + 10 + 12 * i), 'SubExif') elif dirTag == 0xa005: self.read_tiff_ifd( tiffStartPos, p_read_uint16, p_read_uint32, p_read_uint32(tiffStartPos + dirEntryPos + 10 + 12 * i), 'ExifInteroperability') if dataFormat == 2: LOGGER.log( CustomLoggingLevel.IMAGE_INFO, '[%s - %s](string)> %s' % (tagName, exifEnumTag[dirTag], data.replace('\x00', ''))) else: LOGGER.log( CustomLoggingLevel.IMAGE_INFO, '[%s - %s](%s)> Hex:%s' % (tagName, exifEnumTag[dirTag], tiffEnumDataType[dataFormat], data.encode('hex'))) except KeyError or IndexError: LOGGER.warning( '[0x%x] Unable to decode dataformat or entrytag in tiff data, tagName: %s, dirTag: 0x%x, dataFormat: 0x%x, directory: %d/%d.' % (self.fileObject.cur(), tagName, dirTag, dataFormat, i, entryCount)) dirCount += 1 dirEntryPos = p_read_uint32(tiffStartPos + dirEntryPos + 2 + 12 * entryCount)
def tag_soi(self, tag): # 0xFFD0 Start Of Image LOGGER.log( CustomLoggingLevel.EXTRA_DATA, '[0x%x] A new file start tag found.' % self.fileObject.cur())
def start(self): self.version = struct.unpack('h', self.fileObject.read(2))[0] if self.version == 0: # ver 1 # read file header self.version = 1 self.width, self.height, self.rowDataLength = struct.unpack( '3h', self.fileObject.read(6)) self.channel = ord(struct.unpack('b', self.fileObject.read(1))[0]) self.bitsPerPixel = ord( struct.unpack('b', self.fileObject.read(1))[0]) self.headerLength = 10 elif self.version == 0x4D42: # ver 2 3 4 # read file header self.length = struct.unpack('i', self.fileObject.read(4))[0] if self.fileObject.read( 4) != '\x00\x00\x00\x00': # reserved always 0 LOGGER.log( CustomLoggingLevel.OTHER_DATA, '[0x%x] File header reserved options is not 0' % (self.fileObject.cur() - 4)) bitmapOffset = struct.unpack('i', self.fileObject.read(4))[0] # read bitmap header bitmapHeaderLength = struct.unpack('l', self.fileObject.read(4))[0] if bitmapHeaderLength == 12: # ver 2 self.version = 2 self.width, self.height, self.channel, self.bitsPerPixel = struct.unpack( '4h', self.fileObject.read(8)) self.headerLength = 26 elif bitmapHeaderLength == 40: # ver 3 self.version = 3 self.width, self.height, self.channel, self.bitsPerPixel = struct.unpack( '2l2h', self.fileObject.read(12)) self.compressionMethod, self.bitmapLength = struct.unpack( '2L', self.fileObject.read(8)) self.fileObject.read(16) # skip useless header self.headerLength = 53 elif bitmapHeaderLength == 108: # ver 4 self.version = 4 else: LOGGER.error('[0x%x] Unknown BMP file version.' % (self.fileObject.cur() - 4)) if self.version != 0x4D42: # calculate number of entries if self.bitsPerPixel < 24: self.numberOfEntries = 1 << self.bitsPerPixel else: self.numberOfEntries = 0 # read color palette self.colorPalette = [] if self.version == 3 and self.compressionMethod == 3: self.fileObject.read(12 * self.numberOfEntries) else: for i in range(self.numberOfEntries): self.headerLength += 4 t = self.fileObject.read(4) self.colorPalette.append(t[2] + t[1] + t[0] + t[3]) if t[3] != '\x00': LOGGER.log( CustomLoggingLevel.OTHER_DATA, '[0x%x] Color palette reserved option(alpha channel) is not 0, is 0x%x!' % (self.fileObject.cur() - 4, ord(t[3]))) else: LOGGER.error('Magic value BM check failed.') self.padding = self.width * self.bitsPerPixel % 32 if self.padding != 0: self.padding = 32 - self.padding self.rowDataLength = (self.width * self.bitsPerPixel + self.padding) * self.height / 8 LOGGER.log( CustomLoggingLevel.IMAGE_INFO, 'BMP(ver %d): %d*%dpx , channel: %d, fileLength: 0x%x(0x%x) b, headerLength: %d b, rowDataLength: %d b' % (self.version, self.width, self.height, self.channel, self.fileObject.size, self.headerLength + self.rowDataLength, self.headerLength, self.rowDataLength)) if self.channel != 1: LOGGER.log(CustomLoggingLevel.IMAGE_INFO, 'Warning: bmpfile channel is NOT 1!')
def decode_rgb_data(self, data): rowData = [] mask = {1: 0b1, 4: 0b1111, 8: 0b11111111} lineLength = self.width * self.bitsPerPixel if lineLength % 8 != 0: lineLength = lineLength / 8 + 1 else: lineLength /= 8 if lineLength % 4 != 0: lineLength = (lineLength / 4 + 1) * 4 index = 0 for j in range(self.height): lineData = [] if self.bitsPerPixel >= 24: for i in range(self.width): if self.bitsPerPixel == 32: lineData.append([ ord(data[index + 2]), ord(data[index + 1]), ord(data[index]), ord(data[index + 3]) ]) index += 4 else: # 24 lineData.append([ ord(data[index + 2]), ord(data[index + 1]), ord(data[index]) ]) index += 3 else: # decode rowdata from color palette kmax = 8 / self.bitsPerPixel i = 0 while i != -1: d = ord(data[index]) index += 1 for k in range(kmax - 1, -1, -1): if i < self.width: colorPalette = self.colorPalette[ (d >> (k * self.bitsPerPixel)) & mask[self.bitsPerPixel]] lineData.append([ ord(colorPalette[0]), ord(colorPalette[1]), ord(colorPalette[2]), ord(colorPalette[3]) ]) i += 1 else: i = -1 rowData = lineData + rowData appendData = data[index:(j + 1) * lineLength] for c in appendData: if c != '\x00' and c != '\xff': LOGGER.log( CustomLoggingLevel.OTHER_DATA, '[0x%x]Unsual append data: 0x%s' % (self.headerLength + index, appendData.encode('hex'))) break index = (j + 1) * lineLength return rowData
def decode_scandata(self): # init decode varible horzY = self.colorQuantization[1]['Horz'] horzCr = self.colorQuantization[2]['Horz'] horzCb = self.colorQuantization[3]['Horz'] vertY = self.colorQuantization[1]['Vert'] vertCr = self.colorQuantization[2]['Vert'] vertCb = self.colorQuantization[3]['Vert'] scanY = horzY * vertY scanCr = horzCr * vertCr scanCb = horzCb * vertCb LOGGER.log( CustomLoggingLevel.IMAGE_DEBUG, "scanY: %d, scanCr: %d, scanCb: %d." % (scanY, scanCr, scanCb)) self.baseY = 0 self.baseCr = 0 self.baseCb = 0 rowData = [[0, 0, 0] for i in range(self.width * self.height)] if vertCb != 1 or vertCr != 1 or horzCb != 1 or horzCr != 1: LOGGER.error( 'Error in decode scan data, ONLY support vertCb==vertCr==horzCb==horzCr==1!' ) else: LOGGER.log(CustomLoggingLevel.IMAGE_DEBUG, 'Start to decode scan data.') # calc block number hBlock = self.width / (horzY * 8) if self.width % (horzY * 8) != 0: hBlock += 1 vBlock = self.height / (vertY * 8) if self.height % (vertY * 8) != 0: vBlock += 1 widthIndex = 0 heightIndex = 0 unsualDataFlagRight = False unsualDataFlagBotton = False for vb in range(vBlock): for hb in range(hBlock): dataY = [] dataCr = [] dataCb = [] [dataY, dataCr, dataCb] = self.read_mcu_block(scanY, scanCr, scanCb, dataY, dataCr, dataCb) for i in range(vertY): heightIndex = (vb * vertY + i) * 8 for j in range(horzY): widthIndex = (hb * horzY + j) * 8 for k in range(8): if heightIndex + k < self.height: for l in range(8): if widthIndex + l < self.width: y = dataY[i * horzY + j][k * 8 + l] cr = dataCr[0][k * 8 + l] cb = dataCb[0][k * 8 + l] r = self.round(y + 1.402 * cr + 128) b = self.round(y + 1.772 * cb + 128) g = self.round(y - 0.34414 * cb - 0.71414 * cr + 128) rowData[(heightIndex + k) * self.width + widthIndex + l] = [r, g, b] LOGGER.info('Please wait, decoding ... (%d/%d)' % (vb, vBlock)) self.clean_bitstream_remainder() if self.scanDataIndex < self.scanDataLength: self.showextradata(''.join(self.scanData[self.scanDataIndex:]), self.scanDataPos + self.scanDataIndex) return rowData
def reserve_tag(self): curPos = '[0x%x]' % self.fileObject.cur() length = self.read_uint16() - 2 data = self.fileObject.read(length) LOGGER.log(CustomLoggingLevel.STEGO_DATA, '%s> %s' % (curPos, data)) return self.find_tag('RESERVED TAG')
def __init__(self, file_object): # file_object.addHandler(logging.StreamHandler()) self.fileObject = file_object if file_object.read(3) != 'GIF': LOGGER.error("File is not a gif file") self.type = "GIF" self.version = file_object.read(3) if self.version != '87a' and self.version != '89a': LOGGER.log(CustomLoggingLevel.OTHER_DATA, "Invalid version") else: LOGGER.log(CustomLoggingLevel.BASIC_DEBUG, "version is " + self.version) self.logicScreenWidth = file_object.read_uint16() self.logicScreenHeight = file_object.read_uint16() mask = file_object.read_uint8() self.pixel = mask & 0b111 mask >>= 3 self.sortFlag = mask & 0b1 mask >>= 1 self.colorResolution = mask & 0b111 mask >>= 3 self.globalColorTableFlag = mask & 0b1 if self.version == "89a": self.backgroundColorIndex = file_object.read_uint8() self.pixelAspectRatio = file_object.read_uint8() # self.globalColorTable = [[0, 0, 0]] * (2 ** (self.pixel + 1)) if self.globalColorTableFlag: self.globalColorTable = [[0, 0, 0] for _ in range(2**(self.pixel + 1))] else: self.globalColorTable = [] LOGGER.log(CustomLoggingLevel.OTHER_DATA, "global table size is %d" % len(self.globalColorTable)) for i in range(len(self.globalColorTable)): for j in range(3): # 0 red 1 green 2 blue self.globalColorTable[i][j] = file_object.read_uint8() self.images = [] image = {} while True: tag = file_object.read_uint8() if tag == 0x3b: LOGGER.log(CustomLoggingLevel.OTHER_DATA, "gif end") break # end of gif if tag == 0x2c: # start of a image descriptor # LOGGER.info("image descriptor") LOGGER.log(CustomLoggingLevel.IMAGE_DEBUG, "image descriptor") image["xOffset"] = file_object.read_uint16() image["yOffset"] = file_object.read_uint16() image["width"] = file_object.read_uint16() image["height"] = file_object.read_uint16() if image["xOffset"] + image["width"] > self.logicScreenWidth or \ image["yOffset"] + image["height"] > self.logicScreenHeight: LOGGER.log( CustomLoggingLevel.OTHER_DATA, "some part out of logic screen at image %d" % len(self.images) + 1) mask = file_object.read_uint8() image["pixel"] = mask & 0b111 mask >>= 3 image["reserved"] = mask & 0b11 if image["reserved"] != 0: LOGGER.log( CustomLoggingLevel.OTHER_DATA, "[0x%x] reserved data should be 0" % self.fileObject.cur()) mask >>= 2 image["sortFlag"] = mask & 0b1 mask >>= 1 image["interlaceFlag"] = mask & 0b1 mask >>= 1 image["localColorTableFlag"] = mask & 0b1 if image["localColorTableFlag"]: image["localColorTable"] = [ [0, 0, 0] for _ in xrange((2**(image["pixel"] + 1))) ] for i in range(len(image["localColorTable"])): for j in range(3): # 0 red 1 green 2 blue image["localColorTable"][i][ j] = file_object.read_uint8() elif tag == 0x21: if self.version != "89a": LOGGER.log(CustomLoggingLevel.OTHER_DATA, "not version 89a but has extension segment.") sub_tag = file_object.read_uint8() if sub_tag == 0xF9: # Graphic Control Extension. LOGGER.log(CustomLoggingLevel.IMAGE_DEBUG, "Graphic Control Extension") block_size = file_object.read_uint8() if block_size != 4: LOGGER.log( CustomLoggingLevel.OTHER_DATA, "block size is not 4 in Graphic Control Extension") control = {} mask = file_object.read_uint8() control["transparentFlag"] = mask & 0b1 mask >>= 1 control["userInputFlag"] = mask & 0b1 mask >>= 1 control["disposalMethod"] = mask & 0b111 # 0 - No disposal specified. The decoder is # not required to take any action. # 1 - Do not dispose. The graphic is to be left # in place. # 2 - Restore to background color. The area used by the # graphic must be restored to the background color. # 3 - Restore to previous. The decoder is required to # restore the area overwritten by the graphic with # what was there prior to rendering the graphic. # 4-7 - To be defined. control["delayTime"] = file_object.read_uint16() control["TransparentColonrIndex"] = file_object.read_uint8( ) terminator = file_object.read_uint8() if terminator != 0: LOGGER.log( CustomLoggingLevel.OTHER_DATA, "[0x%x] terminator in block Graphic Control Extension is not 0" % self.fileObject.cur()) image["control"] = control elif sub_tag == 0xFE: # Comment Extension. LOGGER.log(CustomLoggingLevel.IMAGE_DEBUG, "Comment Extension.") if "comment" not in image: image["comment"] = "" while True: tmp = file_object.read(1) if tmp == '\0': break image["comment"] += tmp LOGGER.log(CustomLoggingLevel.ASCII_DATA, image["comment"]) elif sub_tag == 0x01: # plain text Extension LOGGER.log(CustomLoggingLevel.IMAGE_DEBUG, "plain text Extension") block_size = file_object.read_uint8() if block_size != 12: LOGGER.warning("block size is not 12 in plain text") text = { "gridLeftPosition": file_object.read_uint16(), "gridTopPosition": file_object.read_uint16(), "textGridWidth": file_object.read_uint16(), "textGridHeight": file_object.read_uint16(), "characterCellWidth": file_object.read_uint8(), "characterCellHeight": file_object.read_uint8(), "textForegroundColorIndex": file_object.read_uint8(), "textBackgroundColorIndex": file_object.read_uint8(), "data": "" } while True: tmp = file_object.read(1) if tmp == '\0': break text["data"] += tmp if "text" in image: LOGGER.log(CustomLoggingLevel.OTHER_DATA, "text already in image") image["text"] = text LOGGER.log(CustomLoggingLevel.ASCII_DATA, image["text"]) elif sub_tag == 0xFF: # Application Extension. LOGGER.log(CustomLoggingLevel.IMAGE_DEBUG, "Application Extension.") block_size = file_object.read_uint8() if block_size != 11: LOGGER.log( CustomLoggingLevel.OTHER_DATA, "[0x%x] block size is not 11 in application extension" % self.fileObject.cur()) application = { "identifier": file_object.read(8), "authenticationCode": file_object.read(3) } data_size = file_object.read_uint8() application["data"] = file_object.read(data_size) if "application" in image: LOGGER.log(CustomLoggingLevel.OTHER_DATA, "application Extension already in image") image["application"] = application terminator = file_object.read_uint8() if terminator != 0: LOGGER.log( CustomLoggingLevel.OTHER_DATA, "terminator is not 0 in Application Extension") else: LOGGER.log( CustomLoggingLevel.IMAGE_DEBUG, "[0x%x] unknown extension at" % self.fileObject.cur()) else: # DATA # LOGGER.info("DATA") LOGGER.log(CustomLoggingLevel.IMAGE_DEBUG, "DATA") image["LZWMinimumCodeSize"] = tag image["data"] = [] while True: data_size = file_object.read_uint8() if data_size == 0: break data = file_object.read(data_size) image["data"] += data self.images.append(image) image = {}
def tag_unknown(self, tag): # unknown tag LOGGER.log( CustomLoggingLevel.IMAGE_INFO, '[0x%x] Unknown tag 0x%s found.' % (self.fileObject.cur(), tag.encode('hex')))
def rowdata_ver1(self): if self.fileObject.size - self.rowDataLength - 10 > 10 * ( 1 - detectSensitive): LOGGER.log(CustomLoggingLevel.EXTRA_DATA, 'Some extra data may in end of the file.') LOGGER.error('BMP file version 1 is not surported.')