def __parseIFDs(self, base, start, end, IFD=""): assert IFD != "" log("Enter", "[%s]" % (IFD), "add") if not self._file: assert False entries = getBytes2(self._file, self._orderAPP1) log("Number of entries = %d" % (entries)) for idx in xrange(entries): tag = getBytes2(self._file, self._orderAPP1) dataFormat = getBytes2(self._file, self._orderAPP1) numOfComps = getBytes4(self._file, self._orderAPP1) posBeforeDataOffset = nowAt(self._file) dataOffset = getBytes4(self._file, self._orderAPP1) posAfterDataOffset = nowAt(self._file) if 0 == dataFormat or dataFormat >= len(EXIF_TIFF_DATAFORMAT_LIST): assert False, "dataformat incorrect = %d" % (dataFormat) continue bytesPerComp = EXIF_TIFF_DATAFORMAT_LIST[dataFormat] dataSize = bytesPerComp * numOfComps if dataSize > 4: targetOffset = base + dataOffset if targetOffset <= start or targetOffset >= end: continue else: seekTo(self._file, targetOffset) else: seekTo(self._file, posBeforeDataOffset) entry = self.__getDataFromFormat(tag, dataFormat, dataSize) seekTo(self._file, posAfterDataOffset) log("Leave", "[%s]" % (IFD), "remove")
def __parseIFDs(self, base, start, end, IFD=""): assert IFD != "" log("Enter", "[%s]"%(IFD), "add") if not self._file: assert False entries = getBytes2(self._file, self._orderAPP1) log("Number of entries = %d"%(entries)) for idx in xrange(entries): tag = getBytes2(self._file, self._orderAPP1) dataFormat = getBytes2(self._file, self._orderAPP1) numOfComps = getBytes4(self._file, self._orderAPP1) posBeforeDataOffset = nowAt(self._file) dataOffset = getBytes4(self._file, self._orderAPP1) posAfterDataOffset = nowAt(self._file) if 0 == dataFormat or dataFormat >= len(EXIF_TIFF_DATAFORMAT_LIST): assert False, "dataformat incorrect = %d"%(dataFormat) continue bytesPerComp = EXIF_TIFF_DATAFORMAT_LIST[dataFormat] dataSize = bytesPerComp * numOfComps if dataSize > 4: targetOffset = base + dataOffset if targetOffset <= start or targetOffset >= end: continue else: seekTo(self._file, targetOffset) else: seekTo(self._file, posBeforeDataOffset) entry = self.__getDataFromFormat(tag, dataFormat, dataSize) seekTo(self._file, posAfterDataOffset) log("Leave", "[%s]"%(IFD), "remove")
def parse(self, filePath): self._file = open(filePath) seekTo(self._file, 0) first = getCharToOrd(self._file) marker = getCharToOrd(self._file) if (first != 0xff or marker != JPEG_SOI): assert False, "Not in JPEG format !!" while (marker): first = getCharToOrd(self._file) if first != 0xff or first < 0: break marker = getCharToOrd(self._file) log("%s-%s" % (hex(first), hex(marker))) length = getBytes2(self._file) curPos = nowAt(self._file) log("length= %d, curPos=%d" % (length, curPos)) if marker in [JPEG_EOI, JPEG_SOS]: log("EOI or SOS ... exit parsing") break elif marker == JPEG_APP0: log("Enter", "[APP0]", "add") log("Leave", "[APP0]", "remove") pass # TBD elif marker == JPEG_APP1: log("Enter", "[APP1]", "add") header = getChar(self._file, 4) log("header = %s" % (header)) if header.lower() == 'exif': self.__parseAPP1(curPos + 6, curPos, curPos + length - 2) elif header.lower() == 'http': seekTo(self._file, curPos) xmpBuffer = getChar(self._file, length) checkURL = "http://ns.adobe.com/xap/1.0/" if xmpBuffer.startswith(checkURL): headLen = len(checkURL) self.__parseXMP(xmpBuffer[headLen:], length - headLen) pass log("Leave", "[APP1]", "remove") elif marker == JPEG_APP2: log("Enter", "[APP2]", "add") self.__parseAPP2(length) log("Leave", "[APP2]", "remove") pass # TBD elif marker == JPEG_APP13: log("Enter", "[APP13]", "add") log("Leave", "[APP13]", "remove") pass # TBD seekTo(self._file, curPos + length - 2)
def parse(self, filePath): self._file = open(filePath) seekTo(self._file, 0) first = getCharToOrd(self._file) marker = getCharToOrd(self._file) if (first != 0xff or marker != JPEG_SOI): assert False, "Not in JPEG format !!" while (marker): first = getCharToOrd(self._file) if first != 0xff or first < 0: break marker = getCharToOrd(self._file) log("%s-%s"%(hex(first), hex(marker))) length = getBytes2(self._file) curPos = nowAt(self._file) log("length= %d, curPos=%d"%(length,curPos)) if marker in [JPEG_EOI, JPEG_SOS]: log("EOI or SOS ... exit parsing") break elif marker == JPEG_APP0: log("Enter", "[APP0]", "add") log("Leave", "[APP0]", "remove") pass # TBD elif marker == JPEG_APP1: log("Enter", "[APP1]", "add") header = getChar(self._file, 4) log("header = %s"%(header)) if header.lower() == 'exif': self.__parseAPP1(curPos+6, curPos, curPos+length-2) elif header.lower() == 'http': seekTo(self._file, curPos) xmpBuffer = getChar(self._file, length) checkURL = "http://ns.adobe.com/xap/1.0/" if xmpBuffer.startswith(checkURL): headLen = len(checkURL) self.__parseXMP(xmpBuffer[headLen:], length-headLen) pass log("Leave", "[APP1]", "remove") elif marker == JPEG_APP2: log("Enter", "[APP2]", "add") self.__parseAPP2(length) log("Leave", "[APP2]", "remove") pass # TBD elif marker == JPEG_APP13: log("Enter", "[APP13]", "add") log("Leave", "[APP13]", "remove") pass # TBD seekTo(self._file, curPos+length-2)
def __parseAPP2(self, length): curPos = nowAt(self._file) iptcData = getChar(self._file, length) iccIdentifier = "ICC_PROFILE" if (iptcData.startswith(iccIdentifier)): iccData = iptcData[len(iccIdentifier) + 1:] iccLen = 0 if ord(iccData[0]) == 0x01 and ord(iccData[1]) == 0x01: iccLen = length - 14 elif ord(iccData[0]) == 0x01: # multi-page, support header only iccLen = 128 else: log("Wrong ICC Profile format !") return seekTo(self._file, curPos + 14) from ICCProfileParser import ICCProfileParser iccParser = ICCProfileParser(self._file) iccParser.parse() else: log("Wrong ICC Profile format !") assert False
def __parseAPP2(self, length): curPos = nowAt(self._file) iptcData = getChar(self._file, length) iccIdentifier = "ICC_PROFILE" if (iptcData.startswith(iccIdentifier)): iccData = iptcData[len(iccIdentifier)+1:] iccLen = 0 if ord(iccData[0]) == 0x01 and ord(iccData[1]) == 0x01: iccLen = length -14 elif ord(iccData[0]) == 0x01: # multi-page, support header only iccLen = 128 else: log("Wrong ICC Profile format !") return seekTo(self._file, curPos+14) from ICCProfileParser import ICCProfileParser iccParser = ICCProfileParser(self._file) iccParser.parse() else: log("Wrong ICC Profile format !") assert False
def parseTagTable(): log("Enter", "[ICCProfileTagTable]", "add") tagCount = getBytes4(self._fd) log("Tag count = %d"%(tagCount)) for idx in xrange(tagCount): tagStartPos = nowAt(self._fd) sig = ''.join(getChar(self._fd) for _ in xrange(4)) offset = getBytes4(self._fd) size = getBytes4(self._fd) seekTo(self._fd, basePos+offset) log("Tag sig(%s) / offset(%d) / size(%d) / basePos(%d) / tagSigPos(%d) / tagTypePos(%d) "%(sig, offset, size, basePos, tagStartPos, basePos+offset)) typeDesc = ''.join(getChar(self._fd) for _ in xrange(4)) log("Type Desc(%s)"%(typeDesc)) sigDescObj = GetSigObject(sig, typeDesc, self._fd, size, basePos+offset) assert sig not in self.__dicSig2TagInfo, "Check this file, two same sig !" self.__dicSig2TagInfo[sig] = sigDescObj seekTo(self._fd, tagStartPos+12) log("Leave", "[ICCProfileTagTable]", "remove") pprint.pprint(self.__dicSig2TagInfo) pass
def GetMlucHelper(_fd, sig, tagStartPos): reserved = getBytes4(_fd) assert reserved == 0 numOfRecords = getBytes4(_fd) recordSize = getBytes4(_fd) log(" numOfRecords = %d / recordSize = %s"%(numOfRecords, recordSize)) sigDescObj = MultiLocalizedUnicode(sig) for _ in xrange(numOfRecords): langCode = ''.join(getChar(_fd) for i in xrange(2)) langCountryCode = ''.join(getChar(_fd) for i in xrange(2)) lenRecordString = getBytes4(_fd) offsetRecordString = getBytes4(_fd) here = nowAt(_fd) seekTo(_fd, tagStartPos + offsetRecordString) uniBytes = getChar(_fd, lenRecordString) # TODO : Think a better way to store these special unicode glyph uniChar = unicode(uniBytes, errors='replace') log(" uniChar = %s"%(uniChar)) sigDescObj.add(langCode, langCountryCode, uniChar) seekTo(_fd, here) return sigDescObj
def __parseBasicIFD(self, base, start, end): log("Enter", "[BasicIFD]", "add") if not self._file: assert False entries = getBytes2(self._file, self._orderAPP1) log("Number of entries = %d" % (entries)) for idx in xrange(entries): tag = getBytes2(self._file, self._orderAPP1) dataFormat = getBytes2(self._file, self._orderAPP1) numOfComps = getBytes4(self._file, self._orderAPP1) posBeforeDataOffset = nowAt(self._file) dataOffset = getBytes4(self._file, self._orderAPP1) posAfterDataOffset = nowAt(self._file) if 0 == dataFormat or dataFormat >= len(EXIF_TIFF_DATAFORMAT_LIST): assert False, "dataformat incorrect = %d" % (dataFormat) continue bytesPerComp = EXIF_TIFF_DATAFORMAT_LIST[dataFormat] dataSize = bytesPerComp * numOfComps if dataSize > 4: targetOffset = base + dataOffset if targetOffset <= start or targetOffset >= end: continue else: seekTo(self._file, targetOffset) else: seekTo(self._file, posBeforeDataOffset) entry = self.__getDataFromFormat(tag, dataFormat, dataSize) if entry.getTag() == TAGID_ExifIFD: ifdOffset = entry.getValue() seekTo(self._file, base + ifdOffset) self.__parseIFDs(base, start, end, "ExifIFD") elif entry.getTag() == TAGID_SubIFDs: log("SubIFDs") elif entry.getTag() == TAGID_GPSIFD: ifdOffset = entry.getValue() seekTo(self._file, base + ifdOffset) self.__parseIFDs(base, start, end, IFD="GPSIFD") pass elif entry.getTag() == TAGID_IPTC: log("IPTC") pass elif entry.getTag() == TAGID_XMP: log("XMP") pass elif entry.getTag() == TAGID_Photoshop: log("Photoshop") pass elif entry.getTag() == TAGID_ICCProfile: log("ICCProfile") pass elif entry.getTag() == TAGID_DNGPrivateData: log("DNGPrivateData") pass seekTo(self._file, posAfterDataOffset) log("Leave", "[BasicIFD]", "remove")
def __parseBasicIFD(self, base, start, end): log("Enter", "[BasicIFD]", "add") if not self._file: assert False entries = getBytes2(self._file, self._orderAPP1) log("Number of entries = %d"%(entries)) for idx in xrange(entries): tag = getBytes2(self._file, self._orderAPP1) dataFormat = getBytes2(self._file, self._orderAPP1) numOfComps = getBytes4(self._file, self._orderAPP1) posBeforeDataOffset = nowAt(self._file) dataOffset = getBytes4(self._file, self._orderAPP1) posAfterDataOffset = nowAt(self._file) if 0 == dataFormat or dataFormat >= len(EXIF_TIFF_DATAFORMAT_LIST): assert False, "dataformat incorrect = %d"%(dataFormat) continue bytesPerComp = EXIF_TIFF_DATAFORMAT_LIST[dataFormat] dataSize = bytesPerComp * numOfComps if dataSize > 4: targetOffset = base + dataOffset if targetOffset <= start or targetOffset >= end: continue else: seekTo(self._file, targetOffset) else: seekTo(self._file, posBeforeDataOffset) entry = self.__getDataFromFormat(tag, dataFormat, dataSize) if entry.getTag() == TAGID_ExifIFD: ifdOffset = entry.getValue() seekTo(self._file, base+ifdOffset) self.__parseIFDs(base, start, end, "ExifIFD") elif entry.getTag() == TAGID_SubIFDs: log("SubIFDs") elif entry.getTag() == TAGID_GPSIFD: ifdOffset = entry.getValue() seekTo(self._file, base+ifdOffset) self.__parseIFDs(base, start, end, IFD="GPSIFD") pass elif entry.getTag() == TAGID_IPTC: log("IPTC") pass elif entry.getTag() == TAGID_XMP: log("XMP") pass elif entry.getTag() == TAGID_Photoshop: log("Photoshop") pass elif entry.getTag() == TAGID_ICCProfile: log("ICCProfile") pass elif entry.getTag() == TAGID_DNGPrivateData: log("DNGPrivateData") pass seekTo(self._file, posAfterDataOffset) log("Leave", "[BasicIFD]", "remove")
def __parseICCProfile(self): # Refer to http://blog.fpmurphy.com/2012/03/extract-icc-profile-from-images.html basePos = nowAt(self._fd) def parseHeader(): log("Enter", "[ICCProfileHeader]", "add") profileSize = getBytes4(self._fd) cmmType = ''.join(getChar(self._fd) for _ in xrange(4)) lstVer = [str(getCharToOrd(self._fd)) for _ in xrange(4)] self.__dicHeaderInfo[H_CMM_TYPE] = cmmType self.__dicHeaderInfo[H_VERSION] = lstVer[0] + "." + lstVer[1] deviceClass = ''.join(getChar(self._fd) for _ in xrange(4)) colorSpaceOfData = ''.join(getChar(self._fd) for _ in xrange(4)) pcs = ''.join(getChar(self._fd) for _ in xrange(4)) self.__dicHeaderInfo[H_DEVICE_CLASS] = dicDevCls2Name.get(deviceClass, "Not found") self.__dicHeaderInfo[H_COLOR_SPACE] = colorSpaceOfData.strip() self.__dicHeaderInfo[H_PROFILE_CONNECTION_SPACE] = pcs.strip() lstDatetime = [getBytes2(self._fd) for _ in xrange(6)] signature = ''.join(getChar(self._fd) for _ in xrange(4)) assert signature == "acsp", "Not a standard ICC Profile !!" primaryPlatform = ''.join(getChar(self._fd) for _ in xrange(4)) def getBitsList(numBytes): lstBits = [] for _ in xrange(numBytes): bits_short = bin(getCharToOrd(self._fd))[2:] bits_full = '00000000'[len(bits_short):] + bits_short lstBits.extend([int(b) for b in bits_full]) return lstBits lstProfileFlags = getBitsList(4) self.__dicHeaderInfo[H_CREATE_DATETIME] = "Datatime = %d/%d/%d-%d:%d:%d"%tuple(lstDatetime) self.__dicHeaderInfo[H_SIGNATURE] = signature self.__dicHeaderInfo[H_PLATFORM] = dicPlatformSig2Desc.get(primaryPlatform, "Not found") self.__dicHeaderInfo[H_IS_EMBEDED] = True if lstProfileFlags[0] == 1 else False self.__dicHeaderInfo[H_USED_INDENDENTLY] = False if lstProfileFlags[1] == 1 else True deviceManufacturer = ''.join(getChar(self._fd) for _ in xrange(4)) deviceModel = ''.join(getChar(self._fd) for _ in xrange(4)) lstDeviceAttributes = getBitsList(8) renderingIntent, zeroPadding = getBytes2(self._fd), getBytes2(self._fd) self.__dicHeaderInfo[H_DEVICE_MANUFACTURER] = deviceManufacturer self.__dicHeaderInfo[H_DEVICE_MODEL] = deviceModel self.__dicHeaderInfo[H_ATTR_T_R] = "Transparency" if lstDeviceAttributes[0] == 1 else "Reflective" self.__dicHeaderInfo[H_ATTR_M_G] = "Matte" if lstDeviceAttributes[1] == 1 else "Glossy" self.__dicHeaderInfo[H_RENDERING_INTENT] = dicRenderingIntent2Desc.get(renderingIntent, "Not found") intX, intY, intZ = getBytes4(self._fd), getBytes4(self._fd), getBytes4(self._fd) X = struct.unpack('f', struct.pack('i', intX)) Y = struct.unpack('f', struct.pack('i', intY)) Z = struct.unpack('f', struct.pack('i', intZ)) CIEXYZ_X = X[0] / Y[0] CIEXYZ_Y = Y[0] / Y[0] CIEXYZ_Z = Z[0] / Y[0] profileCreator = ''.join(getChar(self._fd) for _ in xrange(4)) profileID = [hex(getCharToOrd(self._fd)) for _ in xrange(16)] reserved = [hex(getCharToOrd(self._fd)) for _ in xrange(28)] self.__dicHeaderInfo[H_PROFILE_CREATOR] = profileCreator self.__dicHeaderInfo[H_PROFILE_D50_XYZ] = "(%f, %f, %f)"%(CIEXYZ_X, CIEXYZ_Y, CIEXYZ_Z) log("Header Information : \n%s "%(pprint.pformat(self.__dicHeaderInfo, indent=2))) log("Leave", "[ICCProfileHeader]", "remove") def parseTagTable(): log("Enter", "[ICCProfileTagTable]", "add") tagCount = getBytes4(self._fd) log("Tag count = %d"%(tagCount)) for idx in xrange(tagCount): tagStartPos = nowAt(self._fd) sig = ''.join(getChar(self._fd) for _ in xrange(4)) offset = getBytes4(self._fd) size = getBytes4(self._fd) seekTo(self._fd, basePos+offset) log("Tag sig(%s) / offset(%d) / size(%d) / basePos(%d) / tagSigPos(%d) / tagTypePos(%d) "%(sig, offset, size, basePos, tagStartPos, basePos+offset)) typeDesc = ''.join(getChar(self._fd) for _ in xrange(4)) log("Type Desc(%s)"%(typeDesc)) sigDescObj = GetSigObject(sig, typeDesc, self._fd, size, basePos+offset) assert sig not in self.__dicSig2TagInfo, "Check this file, two same sig !" self.__dicSig2TagInfo[sig] = sigDescObj seekTo(self._fd, tagStartPos+12) log("Leave", "[ICCProfileTagTable]", "remove") pprint.pprint(self.__dicSig2TagInfo) pass parseHeader() parseTagTable()
def GetAToBHelper(_fd, sig, tagStartPos, reverse=False): reserved = getBytes4(_fd) assert reserved == 0 numOfInputChannel = getCharToOrd(_fd) numOfOutputChannel = getCharToOrd(_fd) padding = getBytes2(_fd) log(" Input(%d) , Output(%d), padding(%d)"%(numOfInputChannel, numOfOutputChannel, padding)) assert padding == 0 sigDescObj = None lstBCurve = [] mMat = None lstMCurve = [] clut = [] lstACurve = [] offset2BCurve = getBytes4(_fd) if offset2BCurve != 0: here = nowAt(_fd) seekTo(_fd, tagStartPos + offset2BCurve) for _ in xrange(numOfOutputChannel): subType = getChar(_fd, 4) log(" B Curve subtype = %s"%(subType)) if subType == "para": sigSubDescObj = GetParaCurveHelper(_fd, sig) lstBCurve.append(sigSubDescObj) elif subType == "curv": sigSubDescObj = GetCurveHelper(_fd, sig) lstBCurve.append(sigSubDescObj) seekTo(_fd, here) assert len(lstBCurve) == numOfOutputChannel offset2Matrix = getBytes4(_fd) if offset2Matrix != 0: here = nowAt(_fd) seekTo(_fd, tagStartPos + offset2Matrix) mat = [] for _ in xrange(12): intUnsigned = getBytes2(_fd) intSigned = intUnsigned - 65536 if intUnsigned >= 32768 else intUnsigned fracPart = getBytes2(_fd) v = intSigned + float(fracPart) / 65536 mat.append(v) log(" Matrix = %s"%(str(mat))) mMat = S15Fixed16Array(sig, mat) seekTo(_fd, here) offset2MCurve = getBytes4(_fd) if offset2MCurve != 0: here = nowAt(_fd) seekTo(_fd, tagStartPos + offset2MCurve) for _ in xrange(numOfOutputChannel): subType = getChar(_fd, 4) log(" M Curve subtype = %s"%(subType)) if subType == "para": sigSubDescObj = GetParaCurveHelper(_fd, sig) lstMCurve.append(sigSubDescObj) elif subType == "curv": sigSubDescObj = GetCurveHelper(_fd, sig) lstMCurve.append(sigSubDescObj) seekTo(_fd, here) assert len(lstMCurve) == numOfOutputChannel offset2CLUT = getBytes4(_fd) if offset2CLUT != 0: # TODO : Check the implementation correctness here = nowAt(_fd) seekTo(_fd, tagStartPos + offset2CLUT) lstGridPoints = [] for _ in xrange(16): gridPts = getCharToOrd(_fd) if _ >= numOfInputChannel: assert gridPts == 0 lstGridPoints.append(gridPts) precision = getCharToOrd(_fd) padding = getBytes3(_fd) log(" >>> lstGridPoints : %s"%(str(lstGridPoints))) log(" >>> precision : %s / padding %s "%(str(precision), str(padding))) assert padding == 0 getDataPoint = getBytes2 if precision == 2 else getCharToOrd def fn(x, y): return x * y if y != 0 else x totalCLUTPts = reduce(fn, lstGridPoints) for _ in xrange(totalCLUTPts): tmp = [] for __ in xrange(numOfOutputChannel): tmp.append(getDataPoint(_fd)) clut.append(tmp) seekTo(_fd, here) offset2ACurve = getBytes4(_fd) if offset2ACurve != 0: here = nowAt(_fd) seekTo(_fd, tagStartPos + offset2ACurve) for _ in xrange(numOfOutputChannel): subType = getChar(_fd, 4) log(" A Curve subtype = %s"%(subType)) if subType == "para": sigSubDescObj = GetParaCurveHelper(_fd, sig) lstACurve.append(sigSubDescObj) elif subType == "curv": sigSubDescObj = GetCurveHelper(_fd, sig) lstACurve.append(sigSubDescObj) seekTo(_fd, here) assert len(lstACurve) == numOfOutputChannel log(" O2B(%d) / O2mat(%d) / O2M(%d) / O2CLUT(%d) / O2A(%d)"%(offset2BCurve,\ offset2Matrix, offset2MCurve, offset2CLUT, offset2ACurve)) if reverse: sigDescObj = LutBToA(sig, lstBCurve, mMat, lstMCurve, clut, lstACurve) else: sigDescObj = LutAToB(sig, lstBCurve, mMat, lstMCurve, clut, lstACurve) return sigDescObj