Esempio n. 1
0
    def __init__(self, settings=None, cmdline=False):
        object.__init__(self)

        self._settings = ConfigSettings()
        if settings:
            self._settings.update(settings)
        self._settings = parse_config(self._settings, cmdline)
Esempio n. 2
0
class AppHashDB(object):
    _defaults = {}

    def __init__(self, settings=None, cmdline=False):
        object.__init__(self)

        self._settings = ConfigSettings()
        if settings:
            self._settings.update(settings)
        self._settings = parse_config(self._settings, cmdline)

    @property
    def settings(self):
        return self._settings

    def run(self):
        if self.settings.cmd == "hash":
            return self.run_hash()
        elif self.settings.cmd == "match":
            return self.run_match()
        elif self.settings.cmd == "view":
            return self.run_view()
        elif self.settings.cmd == "query":
            return self.run_query()
        elif self.settings.cmd == "schema":
            return self.run_schema()

    def run_schema(self):
        for x in self.schema():
            log.default(x)

    def schema(self):
        # Setup logging
        log.setLevel(self.settings.verbosity)

        # Display configuration
        log.debug("%s" % self.settings)

        # Setup database
        log.debug("* setup database's...")
        db = HashDatabase2(self.settings.database)
        if not db.open():
            return

        yield "-- [Tables]"
        yield "-- " + ", ".join(
            row["name"]
            for row in db.connection.execute(
                "SELECT name,sql FROM sqlite_master WHERE (type='table') AND (substr(name,1,7) <> 'sqlite_') ORDER BY name"
            )
        )
        for row in db.connection.execute(
            "SELECT name,sql FROM sqlite_master WHERE (type='table') AND (substr(name,1,7) <> 'sqlite_') ORDER BY name"
        ):
            yield row["sql"]
        yield "-- [Databases]"
        for row in db.connection.execute("PRAGMA database_list"):
            yield "-- %s: %s" % (row[1], row[2])

    def run_match(self):
        for target, target_data, matches, db in self.match():
            log.default("* %s" % target.user)
            log.default("  %s" % target_data.path)
            matches.sort()
            for match in matches:
                if not match.is_remote:
                    log.default("  %s" % match.path)
                else:
                    log.default("  %s:%s" % (match.source, match.path))

    def run_view(self):
        try:
            timeformat = locale.nl_langinfo(locale.D_T_FMT)
            # '%a %d %b %Y %T %Z'
        except:
            timeformat = "%Y-%m-%d %H:%M:%S"
        numpadd = len(locale.format("%d", 2 ** 31, True))
        for row, db in self.view():
            m = "*" if row.mark == 0 else " "
            h = row.hash
            s = locale.format("%d", row.size, True)
            t = time.strftime(timeformat, time.localtime(row.time))
            p = row.path
            log.default(
                # hash:mark  time  size  path
                "%s%s %s  %*s  %s"
                % (h, m, t, numpadd, s, p)
            )

    def view(self):
        # Setup logging
        log.setLevel(self.setting_verbosity)

        # Display configuration
        display_settings(self.settings, log.debug)

        # Setup database
        log.debug("* setup database's...")
        db = HashDatabase(self.setting_database)
        db.add_combines(self.setting_combine)
        if not db.open():
            return

        # Read mounts (for truepath)
        mounts = MountEntries()

        # Build a query string, filtering on targets
        targets = [mounts.truepath(t) for t in self.setting_targets]
        qfilters = []
        qargmap = {}
        if ("/" not in targets) and ("\\" not in targets) and ("//" not in targets) and ("\\\\" not in targets):
            for i, target in enumerate(targets):
                target = mounts.truepath(target)
                qfilters.append(
                    r"""(path = :%(name)s) OR (substr(path, 1, :%(name)s_len + 1) = :%(name)s || '/')"""
                    % {"name": "t%02d" % i}
                )
                qargmap.update({"t%02d" % i: target, "t%02d_len" % i: len(target)})
        qfilter = (r"""WHERE """ + r""" OR """.join(qfilters)) if len(qfilters) != 0 else r""""""
        qorder = (
            r"""
            ORDER BY
                path,
                mark DESC
        """
            if self.setting_walk_depth
            else r"""
            ORDER BY
                count_components(path),
                path,
                mark DESC
        """
        )

        query = (
            r"""
            SELECT
                *
            FROM
                combinedtab
        """
            + qfilter
            + qorder
        )

        # yield all results as a HashRowData blob (don't expose the underlying row)
        for row in db.connection.execute(query, qargmap):
            yield (
                HashRowData(path=row["path"], hash=row["hash"], mark=row["mark"], time=row["time"], size=row["size"]),
                db,
            )

    def run_hash(self):
        for target, hash, db in self.hash():
            log.default("%s  %s" % (hash, target.user))

    def hash(self, targets=None):
        # Setup logging
        log.setLevel(self.settings.verbosity)

        # Display configuration
        log.debug("%s" % self.settings)

        # Setup database
        log.debug("* setup database's...")
        db = HashDatabase2(self.settings.database)
        db.extend_locals(self.settings.databases_locals)
        if not db.open():
            return

        # Setup the walker
        log.debug("* setup walker...")
        walker = Walker()
        walker.walk_depth = self.settings.walk_depth
        walker.extend_targets(targets or self.settings.hash_targets)
        walker.extend_skip_fstypes(self.settings.skip_fstypes)
        walker.extend_skip_paths(self.settings.skip_paths)
        walker.extend_skip_names(self.settings.skip_names)
        walker.extend_skip_dirnames(self.settings.skip_dirnames)
        walker.extend_skip_filenames(self.settings.skip_filenames)
        walker.skip_mounts = self.settings.skip_mounts
        walker.skip_binds = self.settings.skip_binds
        walker.skip_symlinks = self.settings.skip_symlinks

        log.debug("* walk...")

        try:
            start_time = time.time()
            start_changes = db.connection.total_changes
            for target in walker.walk():
                target_hash = db.path_hash(target.true, target.stat)
                if target_hash == None:
                    target_hash = build_hash(target)
                    if target_hash != None:
                        db.path_setstat(target.true, target.stat, target_hash)
                if target_hash != None:
                    yield (target, target_hash, db)

                # Only commit every so often since we are limited by disk speed
                now = time.time()
                if ((now - start_time) >= THRESHHOLD_TIMEDELTA) or (
                    (db.connection.total_changes - start_changes) > THRESHHOLD_CHANGEDELTA
                ):
                    log.debug("* committing changes...")
                    db.connection.commit()
                start_time = time.time()
                start_changes = db.connection.total_changes
        finally:
            log.debug("* committing changes...")
            db.connection.commit()

    def match(self):
        # Record matches (hash,size) pairs
        matches_done = set()

        # Iterate over targets
        try:
            for target, hash, db in self.hash(targets=self.settings.match_targets):
                matches = []

                # Find row data
                target_data = db.path_get_prime(target.true)
                if not target_data:
                    log.debug(PREFIX_SKIP + "%r (unable to find data)" % target.user)
                    continue

                # Already reported?
                if (target_data.hash, target_data.size) in matches_done:
                    log.debug(PREFIX_SKIP + "%r (already reported match)" % target.user)
                    continue
                matches_done.add((target_data.hash, target_data.size))

                # Search for duplicates
                for match_data in db.path_select_duplicates(
                    path=target_data.path, hash=target_data.hash, size=target_data.size
                ):
                    if not match_data.is_remote:
                        # local
                        if self.settings.match_verify:
                            try:
                                match_stat = os.lstat(match_data.path)
                            except OSError, ex:
                                log.debug(PREFIX_SKIP + "%r (unable to lstat match): %s" % (match_data.path, ex))
                                continue
                            except IOError, ex:
                                log.debug(PREFIX_SKIP + "%r (unable to lstat match): %s" % (match_data.path, ex))
                                continue

                            if not stat.S_ISREG(match_stat.st_mode):
                                log.debug(PREFIX_SKIP + "%r (not a regular file)" % match_data.path)
                                continue  # No longer a regular file

                            # Verify/update hash
                            match_hash = db.path_hash(match_data.path, match_stat)
                            if match_hash == None:
                                match_hash = build_hash(Walker.Target(match_data.path, match_data.path, match_stat))
                                if match_hash != None:
                                    db.path_setstat(match_data.path, match_stat, match_hash)
                            if match_hash == None:
                                log.debug(PREFIX_SKIP + "%r (unable to determine hash)" % match_data.path)
                                continue

                            # update data
                            match_data = match_data._replace(
                                hash=match_hash, size=match_stat.st_size, time=match_stat.st_mtime, mark=db.mark
                            )
                            if (match_data.hash != target_data.hash) or (match_data.size != target_data.size):
                                log.debug(PREFIX_SKIP + "%r (files no longer match)" % match_data.path)
                                continue  # skip if its no longer identical

                        log.verbose("comp %s" % match_data.path)
                    else: