def analyze_aiff_chunk(self, chunk_name_bytes, chunk_size, aiff_file, display, is_aifc): if chunk_name_bytes == b'COMM': return self.analyze_comm_chunk(chunk_size, aiff_file, display, is_aifc) elif chunk_name_bytes == b'SSND': return self.analyze_ssnd_chunk(chunk_size, aiff_file, display) else: print_with_condition( display, "Skipping {} chunk (size: {}).".format( self.decode_bytes(chunk_name_bytes), chunk_size)) aiff_file.seek(chunk_size, 1) # skip chunk and set file position to next chunk return False
def analyze_wave_chunk(self, chunk_name_bytes, chunk_size, wave_file, num_bytes, display): if chunk_name_bytes == b'fmt ': return self.analyze_fmt_chunk(chunk_size, wave_file, display) elif chunk_name_bytes == b'data': return self.analyze_data_chunk(chunk_size, wave_file, num_bytes, display) else: print_with_condition( display, "Skipping {} chunk (size: {}).".format( self.decode_bytes(chunk_name_bytes), chunk_size)) wave_file.seek(chunk_size, 1) # skip chunk and set file position to next chunk return False
def analyze_data_chunk(self, chunk_size, wave_file, num_bytes, display): print_with_condition( display, "Reading data chunk (size: {}).".format(chunk_size)) # TODO: static 44 does not work with JUNK chunk expected_data_subchunk_size = num_bytes - wave_file.tell() + 8 if chunk_size != expected_data_subchunk_size: warning_with_condition( display, "Data subchunk size does not match file size. Should be {}, but is: {} (difference: {})" .format(expected_data_subchunk_size, chunk_size, abs(expected_data_subchunk_size - chunk_size))) wave_file.seek(chunk_size - 8, 1) # skip audio data return False
def analyze_ssnd_chunk(self, chunk_size, aiff_file, display): print_with_condition( display, "Reading SSND chunk (size: {}).".format(chunk_size)) offset_bytes = aiff_file.read(4) offset = struct.unpack(">I", offset_bytes)[0] print_with_condition(display, "Offset: {}".format(offset)) block_size_bytes = aiff_file.read(4) block_size = struct.unpack(">I", block_size_bytes)[0] print_with_condition(display, "Block Size: {}".format(block_size)) aiff_file.seek(chunk_size - 8, 1) # skip audio data return False
def analyze_comm_chunk(self, chunk_size, aiff_file, display, is_aifc): found_error = False print_with_condition( display, "Reading COMM chunk (size: {})".format(chunk_size)) comm_chunk_bytes = aiff_file.read(chunk_size) if is_aifc: # AIFC file if chunk_size < 22: error_with_condition( display, "Expected chunk size of COMM chunk to be at least 22, but was: {}" .format(chunk_size)) found_error = True else: # AIFF file if chunk_size != 18: error_with_condition( display, "Expected chunk size of COMM chunk to be 18, but was: {}". format(chunk_size)) found_error = True num_channel_bytes = comm_chunk_bytes[:2] num_channels = struct.unpack(">H", num_channel_bytes)[0] print_with_condition(display, "Number of Channels: {}".format(num_channels)) if num_channels < 1: error_with_condition(display, "Number of channels in invalid.") found_error = True num_frames_bytes = comm_chunk_bytes[2:6] num_frames = struct.unpack(">I", num_frames_bytes)[0] print_with_condition(display, "Number of Frames: {}".format(num_frames)) if num_channels < 1: error_with_condition(display, "Number of frames in invalid.") found_error = True bits_per_sample_bytes = comm_chunk_bytes[6:8] bits_per_sample = struct.unpack(">H", bits_per_sample_bytes)[0] print_with_condition(display, "Bits per Sample: {}".format(bits_per_sample)) if bits_per_sample < 1: error_with_condition(display, "Bits per sample value is invalid.") found_error = True sample_rate_bytes = comm_chunk_bytes[8:18] sample_rate = self.decode_float80(sample_rate_bytes) print_with_condition(display, "Sample Rate: {}".format(sample_rate)) if sample_rate < 1: error_with_condition(display, "Sample rate is invalid.") found_error = True if is_aifc: # compression type and compression name are only available in AIFF-C if chunk_size >= 22: compression_type_bytes = comm_chunk_bytes[18:22] if self.is_decodable(compression_type_bytes): print_with_condition( display, "Compression Type: {}".format( self.decode_bytes(compression_type_bytes))) if chunk_size > 22: compression_name_bytes = comm_chunk_bytes[22:] if self.is_decodable(compression_name_bytes): print_with_condition( display, "Compression Name: {}".format( self.decode_bytes(compression_name_bytes))) return found_error
def analyze_aiff_header(self, path, display=True): found_error = False file_name = os.path.basename(path) num_bytes = os.path.getsize(path) print_with_condition( display, "Displaying AIFF File Header Data for File {}".format(file_name)) print_with_condition(display, "Number of Bytes: {}".format(num_bytes)) if num_bytes < 12: print_with_condition( display, "File is only {} bytes long and therefore can not contain an AIFF header." .format(num_bytes)) found_error = True print_with_condition(display, "Reading AIFF Header...") with open(path, "rb") as aiff_file: form_chunk_bytes = aiff_file.read(12) #print_with_condition(display, "Header contains the following bytes (hexadecimal): {}".format(byte_string_to_hex(form_chunk_bytes))) if form_chunk_bytes[:4] != b"FORM": error_with_condition( display, "File does not start with 'FORM' and therefore does not contain a correct AIFF file header." .format(file_name)) found_error = True chunk_size_bytes = form_chunk_bytes[4:8] chunk_size = struct.unpack(">I", chunk_size_bytes)[0] print_with_condition(display, "Chunk Size: {}".format(chunk_size)) expected_chunk_size = num_bytes - 8 if chunk_size != expected_chunk_size: warning_with_condition( display, "Chunk size does not match file size. Should be equal to total number of bytes - 8 = {}, but was: {} (difference: {})" .format(expected_chunk_size, chunk_size, abs(expected_chunk_size - chunk_size))) format_name_bytes = form_chunk_bytes[8:12] if self.is_decodable(format_name_bytes): print_with_condition( display, "Format: {}".format(self.decode_bytes(format_name_bytes))) else: error_with_condition( display, "Invalid (non-printable) format name encountered (byte sequence {})." .format(format_name_bytes)) found_error = True is_aiff = format_name_bytes == b"AIFF" is_aifc = format_name_bytes == b"AIFC" if not (is_aifc or is_aiff): error_with_condition( display, "Bytes 8-12 do neither contain 'AIFF' nor 'AIFC'") found_error = True while aiff_file.tell() < num_bytes: chunk_header = aiff_file.read(8) chunk_name_bytes = chunk_header[:4] if not self.is_decodable(chunk_name_bytes): found_error = True error_with_condition( display, "Invalid (non-printable) chunk name encountered (byte sequence {}). Aborting analysis." .format(chunk_name_bytes)) break chunk_size_bytes = chunk_header[4:8] chunk_size = struct.unpack(">I", chunk_size_bytes)[0] current_position = aiff_file.tell() if self.analyze_aiff_chunk(chunk_name_bytes, chunk_size, aiff_file, display, is_aifc): found_error = True if aiff_file.tell() == current_position: print_error( "No bytes consumed while processing '{}' chunk.". format(self.decode_bytes(chunk_name_bytes))) break return found_error
def analyze_fmt_chunk(self, chunk_size, wave_file, display): found_error = False fmt_chunk_bytes = wave_file.read(chunk_size) print_with_condition(display, "Reading fmt chunk (size: {})".format(chunk_size)) if chunk_size != 16: error_with_condition(display, "fmt chunk size is not equal to 16.") found_error = True audio_format_bytes = fmt_chunk_bytes[0:2] audio_format = struct.unpack("<H", audio_format_bytes)[0] print_with_condition(display, "Audio Format: {}".format(audio_format)) if audio_format != 1: error_with_condition(display, "Audio format is not equal to 1.") found_error = True num_channel_bytes = fmt_chunk_bytes[2:4] num_channels = struct.unpack("<H", num_channel_bytes)[0] print_with_condition(display, "Number of Channels: {}".format(num_channels)) if num_channels < 1: error_with_condition(display, "Number of channels in invalid.") found_error = True sample_rate_bytes = fmt_chunk_bytes[4:8] sample_rate = struct.unpack("<I", sample_rate_bytes)[0] print_with_condition(display, "Sample Rate: {}".format(sample_rate)) if sample_rate < 1: error_with_condition(display, "Sample rate is invalid.") found_error = True byte_rate_bytes = fmt_chunk_bytes[8:12] byte_rate = struct.unpack("<I", byte_rate_bytes)[0] print_with_condition( display, "Byte Rate (number of bytes per second): {}".format(byte_rate)) if byte_rate < 1: error_with_condition(display, "Byte rate is invalid.") found_error = True block_align_bytes = fmt_chunk_bytes[12:14] block_align = struct.unpack("<H", block_align_bytes)[0] print_with_condition( display, "Bytes per Sample in all Channels (Block Align): {}".format( block_align)) if block_align < 1: error_with_condition(display, "Block align in invalid.") found_error = True bits_per_sample_bytes = fmt_chunk_bytes[14:16] bits_per_sample = struct.unpack("<H", bits_per_sample_bytes)[0] print_with_condition(display, "Bits per Sample: {}".format(bits_per_sample)) if bits_per_sample < 1: error_with_condition(display, "Bits per sample value is invalid.") found_error = True computed_block_align = num_channels * bits_per_sample / 8 if block_align != computed_block_align: error_with_condition( display, "Block align should be equal to number of channels * bits per sample / 8 = {}, but is: {} (difference: {})" .format(computed_block_align, block_align, abs(computed_block_align - block_align))) found_error = True computed_byte_rate = sample_rate * computed_block_align if byte_rate != computed_byte_rate: error_with_condition( display, "Byte rate should be equal to sample rate * number of channels * bits per sample / 8 = {}, but is: {} (difference: {})" .format(computed_byte_rate, byte_rate, abs(computed_byte_rate - byte_rate))) found_error = True return found_error
def analyze_wave_header(self, path, display=True): file_name = os.path.basename(path) num_bytes = os.path.getsize(path) print_with_condition( display, "Displaying WAVE File Header Data for File {}".format(file_name)) print_with_condition(display, "Number of Bytes: {}".format(num_bytes)) if num_bytes < 12: print_with_condition( display, "File is only {} bytes long and therefore can not contain a WAVE file header." .format(num_bytes)) return True print_with_condition(display, "Reading WAVE Header...") with open(path, "rb") as wave_file: header_bytes = wave_file.read(12) if header_bytes[:4] != b"RIFF": error_with_condition( display, "File does not start with 'RIFF' and therefore does not contain a correct wave file header." .format(file_name)) return True chunk_size_bytes = header_bytes[4:8] chunk_size = struct.unpack("<I", chunk_size_bytes)[0] print_with_condition(display, "Chunk Size: {}".format(chunk_size)) expected_chunk_size = num_bytes - 8 if chunk_size != expected_chunk_size: warning_with_condition( display, "Chunk size does not match file size. Should be equal to total number of bytes - 8 = {}, but was: {} (difference: {})" .format(expected_chunk_size, chunk_size, abs(expected_chunk_size - chunk_size))) if header_bytes[8:12] != b"WAVE": error_with_condition(display, "Bytes 8-12 do not contain 'WAVE'") return True while wave_file.tell() < num_bytes: chunk_header = wave_file.read(8) chunk_name_bytes = chunk_header[:4] if not self.is_decodable(chunk_name_bytes): error_with_condition( display, "Invalid (non-printable) chunk name encountered (byte sequence {}). Aborting analysis." .format(chunk_name_bytes)) return True chunk_size_bytes = chunk_header[4:8] chunk_size = struct.unpack("<I", chunk_size_bytes)[0] current_position = wave_file.tell() if self.analyze_wave_chunk(chunk_name_bytes, chunk_size, wave_file, num_bytes, display): return True if wave_file.tell() == current_position: raise RuntimeError( "No bytes consumed while processing '{}' chunk.". format(self.decode_bytes(chunk_name_bytes))) if chunk_name_bytes == b'data': # skip remaining parts of the file in case the data chunk is not correct # otherwise this may lead to follow-up errors break return False