Example #1
0
def crw_getendaddress(file, offset, debug_output):
    """
    Calculate the end address of a CRW file.
    
    This function must be implemented with exactly these parameters in order to use it for
    the L{FileExtractorCore}. It determines the end of a CRW file within a binary source file.
    
    When examing several examples of CRW files and taking some documentation of this file format
    in addition it could be outworked that right at the end of a CRW file kind of a directory
    is stored. Luckyly for us, some sequences in there appeared at exactly the same position within
    the example files measured from the end of the file. These circumstances enable us to estimate
    the end of a CRW file. Not sure, whether is works for all CRWs, but it worked for ours and
    recovered more than 2000 pictures.
    
    @param file: Source file the start sequence was found in
    @type file: Reference to a file
    @param offset: Position of the start sequence within the source file
    @type offset: C{int}
    @param debug_output: Indicates, whether to produce output to standard out.
    @type debug_output: C{Boolean}
    
    @return: -1 if the end of the file could not be determined, otherwise the offset inside the
    source file for the end of the found file measured from the beginning of the source file.
    @rtype: C{int}
    
    @attention: There is no limitation implemented for the file size of the found file. Consequently,
    this function will examine the source file from the start offset up to the end, which can
    take quite a while.
    @todo: Implement a break criterion and limit the file size.
    """
    if debug_output:
        print("\tEntered function in additional module manual_crw for calculating")
        print("\tend address for CRW file (depending on the file size this may last several minutes)")
    st = file.read(len(sequ) - 1)
    while len(st) > 1:
        ch = file.read(1)
        st = st + ch
        if ord(st[0]) == sequ[0]:
            if tools.checkString(st, sequ):
                pos = file.tell() - len(sequ)
                file.seek(pos + 7)
                st1 = file.read(5)
                if tools.checkString(st1, sequ_zeros):
                    file.seek(pos + 12)
                    st2 = file.read(2)
                    if tools.checkString(st2, sequ1):
                        file.seek(pos + 22)
                        st2 = file.read(2)
                        if tools.checkString(st2, sequ2):
                            if debug_output:
                                print("\t--- Leave function - calculated end address: 0x%x" % (pos + 35))
                            return pos + 35
        st = st[1:]
    if debug_output:
        print("\t--- Leave function now - no end address could be determined")
    return -1
def startSearch(status_passed):
    """
    Invokes the search on the file.
    
    Central bit of the core - examines the source file byte by byte and checks against the
    provided start sequences of files (provided by the signatures). Ones, a start sequence
    has been found, the further actions depend on the type of signature (in fact, how the end
    of the file is identified - by end sequence, file size info inside file or manual by
    additional module).
    
    The status object is constantly updated. The frequency of updating the status instance with
    progress within a source file depends on the value in the settings instance (
    ExecutionSettings.ExecutionSettings.output_frequency) In fact, this variable says how often
    a message shall be sent to the status object in total for the current source file.
    
    @param status_passed: Reference to the status instance for applying runtime information and
    gaining settings for the running.
    @type status_passed: ExecutionSettings.ExecutionStatus
    return: Active Signatures; Overall Counter
    rtype: C{List} of C{Signatures}; C{int}
    """
    global binfile, start, skipped, size, maxlength
    global status
    status= status_passed
    st = binfile.read(maxlength-1)

    dx = size / status.settings.output_frequency           # for user output only
    x = dx                                  # same here    
    
    file_pos = binfile.tell()
    status.startedOneSourceFile(size)
    
    while 1:
        c = ''
        if file_pos < status.file_end:
            c = binfile.read(1)
        file_pos = binfile.tell()
        if c!='':               # end of file
            st += c
    
        if file_pos-status.file_start >= x:
            status.updateFineshedForCurrent(file_pos-status.file_start)
            
            if status.settings.output_level == 0:
                pass  
            elif status.settings.output_level == 3 and size!=0:
                print "Pos: 0x%x - %d / %d KB (%d %%)" %(file_pos, (file_pos-status.file_start)  / 1024 , size / 1024, (file_pos-status.file_start)*100/size)
            elif status.settings.output_level == 2:
                print "%d %%" %((file_pos-status.file_start)*100/size)
            elif status.settings.output_level == 1:
                print '#' ,
            x += dx
            
        for sig in status.settings.signatures:
            if start[sig[signatures.name]] == -1:
                if sig[signatures.start_seq][0] == ord(st[0]):
                    if checkString(st, sig[signatures.start_seq]):
                        start_pos = file_pos-len(st)
                        if status.settings.output_level == 3:
                            print ('Found start at 0x%x for %s' %(start_pos, sig[signatures.description]))
                        if sig[signatures.filesize_type] == signatures.TYPE_FILE_SIZE:
                            offsets = sig[signatures.filesize_address_offsets]
                            ofs = 0
                            for i in offsets:
                                binfile.seek(start_pos + i)
                                val = ord(binfile.read(1))
                                ofs = ofs * 256 + val
                            end_pos = start_pos + ofs
                            correction = sig[signatures.filesize_info_correction]
                            end_pos = end_pos + correction
                            writeFile(sig[signatures.name],status.counterr[sig[signatures.name]]+status.settings.counterstart_global,
                                  sig[signatures.extension],binfile, start_pos, end_pos-1,
                                  status.settings.dest_folder, status.settings.output_level == 3, status)
                            status.counter[sig[signatures.name]] += 1
                            status.counterr[sig[signatures.name]] += 1
                            status.foundFile()
                            binfile.seek(file_pos)
                        elif sig[signatures.filesize_type] == signatures.TYPE_MANUAL:
                            function = sig[signatures.filesizemanual_functionname]
                            if status.settings.output_level == 3:
                                print ('-- Enter signature defined function for end address determination for this file')
                            end_address = function(binfile, start_pos, status.settings.output_level == 3)
                            if (end_address < start_pos):
                                if status.settings.output_level == 3:
                                    print ('-- No valid end address found - skip this file.')
                                continue
                            writeFile(sig[signatures.name],status.counterr[sig[signatures.name]]+status.settings.counterstart_global,
                                  sig[signatures.extension],binfile, start_pos, end_address,
                                  status.settings.dest_folder, status.settings.output_level == 3, status)
                            status.counter[sig[signatures.name]] += 1   
                            status.counterr[sig[signatures.name]] += 1   
                            status.foundFile()
                            binfile.seek(file_pos)
                        else:
                            start[sig[signatures.name]] = start_pos
            else:
                if file_pos < start[sig[signatures.name]] + len(sig[signatures.start_seq]):
                    continue
                if sig[signatures.end_seq][0] == ord(st[0]):
                    if tools.checkString(st, sig[signatures.end_seq]):
                        end_pos = file_pos-len(st)+len(sig[signatures.end_seq])-1
                        if skipped[sig[signatures.name]] < sig[signatures.skip_end_seqs]:
                            skipped[sig[signatures.name]] +=1
                            if status.settings.output_level == 3:
                                print ('Found end at 0x%x for %s - skipped' %(end_pos, sig[signatures.description]))
                            continue
                        if status.settings.output_level == 3:
                            print ('Found end at 0x%x for %s' %(end_pos, sig[signatures.description]))
                        writeFile(sig[signatures.name],status.counterr[sig[signatures.name]]+status.settings.counterstart_global,
                                sig[signatures.extension],binfile, start[sig[signatures.name]], end_pos,
                                status.settings.dest_folder, status.settings.output_level == 3, status)
                        start[sig[signatures.name]] = -1
                        status.counter[sig[signatures.name]] += 1
                        status.counterr[sig[signatures.name]] += 1
                        status.foundFile()
                        skipped[sig[signatures.name]] = 0
            
        if len(st)==1:
            break
        st = st[1:]
    
    status.finishedOneSourceFile()
    binfile.close()
    return status.settings.signatures, status.counterr