def parse(self): """ Parse the structure. """ # The first four bytes of a data structure should always be the ASCII # string 8BIM data = self.read(4) if (data == "8BIM"): # The next two bytes specify the resource ID (tag number) tag_num_data = self.read(2) if (tag_num_data): tag_num = byteform.btousi(tag_num_data, big_endian=self.big_endian) # What then follows is the "Pascal string". The first byte determines its # length. If the total is an uneven number, it is padded with a \x00 # character. We don't need this string, so we step over it. ps_len = byteform.btousi(self.read(1), big_endian=self.big_endian) if ((ps_len % 2) == 0): ps_len += 1 self.read(ps_len) # Now it's getting interesting; the next four bytes determine the length # of the data data_len = byteform.btousi(self.read(4), big_endian=self.big_endian) # Store the byte position and data length in the tags dict self.tags[tag_num] = datablock.DataBlock( self.fp, self.byte_pos, data_len) # Skip to the next structure self.read(data_len)
def parseHeader(self): """Parse the header of the Tiff file, starting ot the byte offset.""" is_tiff = False self.fp.seek(self.offset) # Read the header data = self.fp.read(2) if (data == "MM"): self.big_endian = True is_tiff = True elif (data == "II"): self.big_endian = False is_tiff = True # The next two bytes should be 42 if (is_tiff) and (byteform.btousi(self.fp.read(2), big_endian = self.big_endian) == 42): is_tiff = True # If the file does not have a Tiff header, report it as false if not(is_tiff): raise "File is not Tiff" # Locate the Exif data self.exif_offset = byteform.btousi(self.fp.read(4), big_endian = self.big_endian)
def parseHeader(self): """Parse the header of the Tiff file, starting ot the byte offset.""" is_tiff = False self.fp.seek(self.offset) # Read the header data = self.fp.read(2) if (data == "MM"): self.big_endian = True is_tiff = True elif (data == "II"): self.big_endian = False is_tiff = True # The next two bytes should be 42 if (is_tiff) and (byteform.btousi(self.fp.read(2), big_endian=self.big_endian) == 42): is_tiff = True # If the file does not have a Tiff header, report it as false if not (is_tiff): raise "File is not Tiff" # Locate the Exif data self.exif_offset = byteform.btousi(self.fp.read(4), big_endian=self.big_endian)
def parse(self): """ Parse the structure. """ # The first four bytes of a data structure should always be the ASCII # string 8BIM data = self.read(4) if (data == "8BIM"): # The next two bytes specify the resource ID (tag number) tag_num_data = self.read(2) if (tag_num_data): tag_num = byteform.btousi(tag_num_data, big_endian = self.big_endian) # What then follows is the "Pascal string". The first byte determines its # length. If the total is an uneven number, it is padded with a \x00 # character. We don't need this string, so we step over it. ps_len = byteform.btousi(self.read(1), big_endian = self.big_endian) if ((ps_len % 2) == 0): ps_len += 1 self.read(ps_len) # Now it's getting interesting; the next four bytes determine the length # of the data data_len = byteform.btousi(self.read(4), big_endian = self.big_endian) # Store the byte position and data length in the tags dict self.tags[tag_num] = datablock.DataBlock(self.fp, self.byte_pos, data_len) # Skip to the next structure self.read(data_len)
def parse(self): """ Parse the IPTC block. """ # The IPTC data is structured in a very simple way; as a lineary stream of # tags and the associated data. Tags can belong to different segments, but # this segment number is simply written in front of each tag. #Each tag starts with the bye 0x1C try: start_byte = self.read(1) except IOError: start_byte = None while (start_byte == "\x1c"): # The next byte specifies the record number record_num = byteform.btousi(self.read(1)) # Then follows the tag number tag_type = byteform.btousi(self.read(1)) # The next two bytes determine the payload length, or the length of the # fields specifying the payload length if we have an extended tag. length = byteform.btousi(self.read(2), big_endian=self.big_endian) # If the most significant bit is 1, we have an extended tag if (length & 32768): # 10000000 00000000 # We have an extended tag length_count = length & 32767 # 01111111 11111111 length = byteform.btousi(self.read(length_count), big_endian=self.big_endian) # Construct the tag and append it to the list tag_obj = datablock.DataBlock(self.fp, self.tell() + self.getDataOffset(), length) record = self.records.query("num", record_num, "record") if (tag_type in record.fields): record.fields[tag_type].append(tag_obj) else: record.fields[tag_type] = [tag_obj] # Seek to the next read position and read the new first byte self.seek(self.tell() + length) try: start_byte = self.read(1) except IOError: break self.parsed = True
def parse(self): """ Parse the IPTC block. """ # The IPTC data is structured in a very simple way; as a lineary stream of # tags and the associated data. Tags can belong to different segments, but # this segment number is simply written in front of each tag. #Each tag starts with the bye 0x1C try: start_byte = self.read(1) except IOError: start_byte = None while (start_byte == "\x1c"): # The next byte specifies the record number record_num = byteform.btousi(self.read(1)) # Then follows the tag number tag_type = byteform.btousi(self.read(1)) # The next two bytes determine the payload length, or the length of the # fields specifying the payload length if we have an extended tag. length = byteform.btousi(self.read(2), big_endian = self.big_endian) # If the most significant bit is 1, we have an extended tag if (length & 32768): # 10000000 00000000 # We have an extended tag length_count = length & 32767 # 01111111 11111111 length = byteform.btousi(self.read(length_count), big_endian = self.big_endian) # Construct the tag and append it to the list tag_obj = datablock.DataBlock(self.fp, self.tell() + self.getDataOffset(), length) record = self.records.query("num", record_num, "record") if (tag_type in record.fields): record.fields[tag_type].append(tag_obj) else: record.fields[tag_type] = [tag_obj] # Seek to the next read position and read the new first byte self.seek(self.tell() + length) try: start_byte = self.read(1) except IOError: break self.parsed = True
def __parseHeader__(self): """ Parse the first bytes of the segment header, and return a list of number and length. """ # Read the header header = self.read(4, 0) # The first byte of a JPEG segment header should be 0xFF if (header[0] != "\xFF"): raise "Not a JPEG segment!" # The next byte determines the type number of the segment number = byteform.btousi(header[1], big_endian = self.big_endian) # The next two bytes determine the length of the segment. We subtract two # because it includes these two bytes. length = byteform.btousi(header[2:4], big_endian = self.big_endian) - 2 return [number, length]
def __parseHeader__(self): """ Parse the first bytes of the segment header, and return a list of number and length. """ # Read the header header = self.read(4, 0) # The first byte of a JPEG segment header should be 0xFF if (header[0] != "\xFF"): raise "Not a JPEG segment!" # The next byte determines the type number of the segment number = byteform.btousi(header[1], big_endian=self.big_endian) # The next two bytes determine the length of the segment. We subtract two # because it includes these two bytes. length = byteform.btousi(header[2:4], big_endian=self.big_endian) - 2 return [number, length]
def __init__(self, file_pointer = None, ifd_offset = 0, header_offset = 0, data = None, big_endian = False): # Fujifilm always uses little endian block = datablock.DataBlock(fp = file_pointer, offset = ifd_offset + header_offset, data = data) header = block.read(8) if (header == None): mn_offset = 0 elif (header == "FUJIFILM"): mn_offset = byteform.btousi(block.read(4), big_endian = False) else: raise "No valid Fujifilm Makernote!" ifd.IFD.__init__(self, file_pointer, mn_offset, ifd_offset + header_offset, data, big_endian = False)
def mapDiskFields(self): """ Reads the exif structure from disk and maps all the fields. """ self.fields = {} # Empty the map # Go to the proper offset and read the first two bytes. They represent the # number of fields in the IFD if (self.getDataLength() > 0) or (self.getDataLength() == None): # Parse when there's data, or when data size is unknown self.seek(self.ifd_offset) num_fields = byteform.btousi(self.read(2), big_endian = self.big_endian) for field_num in range(num_fields): # Read the type of the tag (number), the way the payload is stored, and # the length of the payload tag_type = byteform.btousi(self.read(2), big_endian = self.big_endian) data_type = byteform.btousi(self.read(2), big_endian = self.big_endian) payload_len = byteform.btousi(self.read(4), big_endian = self.big_endian) # The word width (number of bytes to encode one "character") of the # payload is determined by the data type. This needs to be multiplied by # the number of characters to get the total number of bytes. num_bytes = payload_len * DATA_TYPES[data_type].word_width # The next four bytes either encode an offset te where the payload can # be found, or the payload itself if it fits in these four bytes. We # calculate the absolute offset in the file or data stream. if (num_bytes < 5): payload_offset = self.tell() + self.header_offset self.read(4) else: payload_offset = byteform.btousi(self.read(4), big_endian = self.big_endian) + self.header_offset# - self.ifd_offset # Store the tag. This method does not check if we know the tag type, and # that's exactly what we want. tag = Tag(data_type, fp = self.fp, data = self.data, offset = payload_offset, length = num_bytes) self.fields[tag_type] = tag self.next_ifd_offset = byteform.btoi(self.read(4), big_endian = self.big_endian)
def __init__(self, file_pointer=None, ifd_offset=0, header_offset=0, data=None, big_endian=False): # Fujifilm always uses little endian block = datablock.DataBlock(fp=file_pointer, offset=ifd_offset + header_offset, data=data) header = block.read(8) if (header == None): mn_offset = 0 elif (header == "FUJIFILM"): mn_offset = byteform.btousi(block.read(4), big_endian=False) else: raise "No valid Fujifilm Makernote!" ifd.IFD.__init__(self, file_pointer, mn_offset, ifd_offset + header_offset, data, big_endian=False)