Example #1
0
def compute_digests(rootdir, paths, num_threads=None):
    """Return a dict with (path, digest)."""

    # First, we write the temporary file with filenames in the format
    # expected by hashdeep
    rootdir = os.path.normpath(rootdir)
    fd, tempfilename = tempfile.mkstemp()
    with os.fdopen(fd, 'w') as tmpfile:
        for p in paths:
            tmpfile.write(utils.build_native_path(rootdir, p))
            tmpfile.write('\n')

    # Run hashdeep -c sha1,sha256 -f tempfilename -l -d (-j num_threads)
    cmd = ['hashdeep', '-c', 'sha1,sha256', '-f', tempfilename, '-l', '-d']
    if num_threads is not None:
        cmd.extend(['-j', str(num_threads)])
    output = subprocess.check_output(cmd)

    # Delete temporary file
    os.remove(tempfilename)
  
    # Parse output
    root = ET.fromstring(output)
    res = {}

    for fileobj in root:
        if fileobj.tag != 'fileobject':
            continue

        name = None
        sha1 = None
        sha256 = None
        for child in fileobj:
            if child.tag == 'hashdigest':
                if child.attrib['type'] == 'SHA1':
                    sha1 = child.text
                elif child.attrib['type'] == 'SHA256':
                    sha256 = child.text
                else:
                    raise ValueError('Unexpected hash type "{}".'.format(
                        child.attrib['type']))
            if child.tag == 'filename':
                name = utils.get_path_from_native_path(rootdir, child.text)
        if not name or not sha1 or not sha256:
            raise ValueError('Could not extract all required information from '
                'digest.')
        res[name] = Digest(sha1, sha256)

    keys = res.keys()
    if len(keys) != len(paths) or set(keys) != set(paths):
        raise ValueError('List of filenames returned by hashdeep does not '
            'match the input list.')
    
    return res
Example #2
0
def assemble_paths(rootdir, patterns):
    filenames = []
    symlinks = []
    directories = []
    errors = []
    ignored = []

    def listdir_onerror(error):
        errors.append(error)

    rootdir = os.path.normpath(rootdir)

    # Handle root separately because the os.walk code below is not going to
    # process it.
    decision = pattern_decision(os.sep, patterns)
    if decision == INCLUDE:
        # If we want to include the directory entry, we have to find out
        # its type.
        if os.path.isdir(rootdir):
            directories.append(os.sep)
        else:
            raise ValueError('The root is not a directory, which should not happen.')
    elif decision != EXCLUDE:
        raise ValueError('Unknown file decision {}.'.format(decision))

    # Now recursively traverse the file system
    for root, dirs, files in os.walk(rootdir, topdown=True,
                                     onerror=listdir_onerror, followlinks=False):
        for f in itertools.chain(files, dirs):
            native_path = os.path.join(root, f)
            path = utils.get_path_from_native_path(rootdir, native_path)
            decision = pattern_decision(path, patterns)
            if decision == INCLUDE:
                # If we want to include the directory entry, we have to find out
                # its type.
                if os.path.islink(native_path):
                    symlinks.append(path)
                elif os.path.isfile(native_path):
                    filenames.append(path)
                elif os.path.isdir(native_path):
                    directories.append(path)
                else:
                    ignored.append(path)
            elif decision != EXCLUDE:
                raise ValueError('Unknown file decision {}.'.format(decision))
        # Also, we remove all mount points from dirs so that os.walk does not
        # recurse into a different file system.
        dirs[:] = [d for d in dirs if not os.path.ismount(os.path.join(rootdir, d))]

    return MatchingResult(filenames, symlinks, directories, errors, ignored)
Example #3
0
def scan_backup(rootdir):
    files = []
    symlinks = []
    directories = []
    errors = []
    ignored = []

    def listdir_onerror(error):
        errors.append(error)

    rootdir = os.path.normpath(rootdir)
    if os.path.isdir(rootdir):
        directories.append(os.sep)
    else:
        raise ValueError('The root is not a directory, which should not happen.')

    # Now recursively traverse the file system
    for root, cur_dirs, cur_files in os.walk(rootdir,
                                             topdown=True,
                                             onerror=listdir_onerror,
                                             followlinks=False):
        for f in itertools.chain(cur_files, cur_dirs):
            native_path = os.path.join(root, f)
            path = utils.get_path_from_native_path(rootdir, native_path)
            if path == '/.go_backup':
                ignored.append(path)
            elif os.path.islink(native_path):
                symlinks.append(path)
            elif os.path.isfile(native_path):
                files.append(path)
            elif os.path.isdir(native_path):
                directories.append(path)
            else:
                ignored.append(path)
        # Also, we remove all mount points from dirs so that os.walk does not
        # recurse into a different file system.
        cur_dirs[:] = [d for d in cur_dirs if not os.path.ismount(os.path.join(rootdir, d))]

    return ScanResult(files, symlinks, directories, errors, ignored)
Example #4
0
def test_get_path_from_native_path_non_normalized_rootdir():
    rootdir = '//foo'
    native_path = '/foo'
    with pytest.raises(ValueError):
        res = utils.get_path_from_native_path(rootdir, native_path)
Example #5
0
def test_get_path_from_native_path_non_absolute_native_path():
    rootdir = '/'
    native_path = 'foo'
    with pytest.raises(ValueError):
        res = utils.get_path_from_native_path(rootdir, native_path)
Example #6
0
def test_get_path_from_native_path_equal_paths():
    rootdir = '/home/x'
    native_path = '/home/x'
    res = utils.get_path_from_native_path(rootdir, native_path)
    assert res == '/'
Example #7
0
def test_get_path_from_native_path_both_dirs_base():
    rootdir = '/'
    native_path = '/'
    res = utils.get_path_from_native_path(rootdir, native_path)
    assert res == '/'
Example #8
0
def test_get_path_from_native_path_base_rootdir():
    rootdir = '/'
    native_path = '/foo/bar'
    res = utils.get_path_from_native_path(rootdir, native_path)
    assert res == '/foo/bar'
Example #9
0
def test_get_path_from_native_path_simple_2():
    rootdir = '/foo'
    native_path = '/foo/bar/baz'
    res = utils.get_path_from_native_path(rootdir, native_path)
    assert res == '/bar/baz'
Example #10
0
def test_get_path_from_native_path_simple_1():
    rootdir = '/home/x'
    native_path = '/home/x/foo'
    res = utils.get_path_from_native_path(rootdir, native_path)
    assert res == '/foo'
Example #11
0
def test_get_path_from_native_path_out_of_dir_path_1():
    rootdir = '/foo/bar'
    native_path = '/foobar'
    with pytest.raises(ValueError):
        res = utils.get_path_from_native_path(rootdir, native_path)