コード例 #1
0
    def ListDescendantPathInfos(self,
                                client_id,
                                path_type,
                                components,
                                timestamp=None,
                                max_depth=None):
        """Lists path info records that correspond to children of given path."""
        result = []
        root_dir_exists = False

        for path_idx, path_record in self.path_records.items():
            other_client_id, other_path_type, other_components = path_idx
            path_info = path_record.GetPathInfo(timestamp=timestamp)

            if client_id != other_client_id or path_type != other_path_type:
                continue
            if other_components == tuple(components):
                root_dir_exists = True
                if not path_info.directory:
                    raise db.NotDirectoryPathError(client_id, path_type,
                                                   components)
            if len(other_components) == len(components):
                continue
            if not collection.StartsWith(other_components, components):
                continue
            if (max_depth is not None
                    and len(other_components) - len(components) > max_depth):
                continue

            result.append(path_info)

        if not root_dir_exists and components:
            raise db.UnknownPathError(client_id, path_type, components)

        if timestamp is None:
            return sorted(result, key=lambda _: tuple(_.components))

        # We need to filter implicit path infos if specific timestamp is given.

        # TODO(hanuszczak): If we were to switch to use path trie instead of storing
        # records by path id, everything would be much easier.

        class TrieNode(object):
            """A trie of path components with path infos as values."""
            def __init__(self):
                self.path_info = None
                self.children = {}
                self.explicit = False

            def Add(self, path_info, idx=0):
                """Adds given path info to the trie (or one of its subtrees)."""
                components = path_info.components
                if idx == len(components):
                    self.path_info = path_info
                    self.explicit |= (path_info.HasField("stat_entry")
                                      or path_info.HasField("hash_entry"))
                else:
                    child = self.children.setdefault(components[idx],
                                                     TrieNode())
                    child.Add(path_info, idx=idx + 1)
                    self.explicit |= child.explicit

            def Collect(self, path_infos):
                if self.path_info is not None and self.explicit:
                    path_infos.append(self.path_info)

                for component in sorted(self.children):
                    self.children[component].Collect(path_infos)

        trie = TrieNode()
        for path_info in result:
            trie.Add(path_info)

        explicit_path_infos = []
        trie.Collect(explicit_path_infos)
        return explicit_path_infos
コード例 #2
0
ファイル: mysql_paths.py プロジェクト: threatintel-c/grr
    def ListDescendantPathInfos(self,
                                client_id,
                                path_type,
                                components,
                                timestamp=None,
                                max_depth=None,
                                cursor=None):
        """Lists path info records that correspond to descendants of given path."""
        path_infos = []

        query = ""

        path = mysql_utils.ComponentsToPath(components)
        escaped_path = db_utils.EscapeWildcards(
            db_utils.EscapeBackslashes(path))
        values = {
            "client_id": db_utils.ClientIDToInt(client_id),
            "path_type": int(path_type),
            "escaped_path": escaped_path,
            "path": path,
        }

        query += """
    SELECT path, directory, UNIX_TIMESTAMP(p.timestamp),
           stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp),
           hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp)
      FROM client_paths AS p
    """
        if timestamp is None:
            query += """
      LEFT JOIN client_path_stat_entries AS s ON
                (p.client_id = s.client_id AND
                 p.path_type = s.path_type AND
                 p.path_id = s.path_id AND
                 p.last_stat_entry_timestamp = s.timestamp)
      LEFT JOIN client_path_hash_entries AS h ON
                (p.client_id = h.client_id AND
                 p.path_type = h.path_type AND
                 p.path_id = h.path_id AND
                 p.last_hash_entry_timestamp = h.timestamp)
      """
            only_explicit = False
        else:
            query += """
      LEFT JOIN (SELECT sr.client_id, sr.path_type, sr.path_id, sr.stat_entry
                   FROM client_path_stat_entries AS sr
             INNER JOIN (SELECT client_id, path_type, path_id,
                                MAX(timestamp) AS max_timestamp
                           FROM client_path_stat_entries
                          WHERE UNIX_TIMESTAMP(timestamp) <= %(timestamp)s
                       GROUP BY client_id, path_type, path_id) AS st
                     ON sr.client_id = st.client_id
                    AND sr.path_type = st.path_type
                    AND sr.path_id = st.path_id
                    AND sr.timestamp = st.max_timestamp) AS s
             ON (p.client_id = s.client_id AND
                 p.path_type = s.path_type AND
                 p.path_id = s.path_id)
      LEFT JOIN (SELECT hr.client_id, hr.path_type, hr.path_id, hr.hash_entry
                   FROM client_path_hash_entries AS hr
             INNER JOIN (SELECT client_id, path_type, path_id,
                                MAX(timestamp) AS max_timestamp
                           FROM client_path_hash_entries
                          WHERE UNIX_TIMESTAMP(timestamp) <= %(timestamp)s
                       GROUP BY client_id, path_type, path_id) AS ht
                     ON hr.client_id = ht.client_id
                    AND hr.path_type = ht.path_type
                    AND hr.path_id = ht.path_id
                    AND hr.timestamp = ht.max_timestamp) AS h
             ON (p.client_id = h.client_id AND
                 p.path_type = h.path_type AND
                 p.path_id = h.path_id)
      """
            values["timestamp"] = mysql_utils.RDFDatetimeToTimestamp(timestamp)
            only_explicit = True

        query += """
    WHERE p.client_id = %(client_id)s
      AND p.path_type = %(path_type)s
      AND (path LIKE CONCAT(%(escaped_path)s, '/%%') OR path = %(path)s)
    """

        if max_depth is not None:
            query += """
      AND depth <= %(depth)s
      """
            values["depth"] = len(components) + max_depth

        cursor.execute(query, values)
        for row in cursor.fetchall():
            # pyformat: disable
            (path, directory, timestamp, stat_entry_bytes,
             last_stat_entry_timestamp, hash_entry_bytes,
             last_hash_entry_timestamp) = row
            # pyformat: enable

            path_components = mysql_utils.PathToComponents(path)

            if stat_entry_bytes is not None:
                stat_entry = rdf_client_fs.StatEntry.FromSerializedBytes(
                    stat_entry_bytes)
            else:
                stat_entry = None

            if hash_entry_bytes is not None:
                hash_entry = rdf_crypto.Hash.FromSerializedBytes(
                    hash_entry_bytes)
            else:
                hash_entry = None

            datetime = mysql_utils.TimestampToRDFDatetime
            path_info = rdf_objects.PathInfo(
                path_type=path_type,
                components=path_components,
                timestamp=datetime(timestamp),
                last_stat_entry_timestamp=datetime(last_stat_entry_timestamp),
                last_hash_entry_timestamp=datetime(last_hash_entry_timestamp),
                directory=directory,
                stat_entry=stat_entry,
                hash_entry=hash_entry)

            path_infos.append(path_info)

        path_infos.sort(key=lambda _: tuple(_.components))

        # The first entry should be always the base directory itself unless it is a
        # root directory that was never collected.
        if not path_infos and components:
            raise db.UnknownPathError(client_id, path_type, components)

        if path_infos and not path_infos[0].directory:
            raise db.NotDirectoryPathError(client_id, path_type, components)

        path_infos = path_infos[1:]

        # For specific timestamp, we return information only about explicit paths
        # (paths that have associated stat or hash entry or have an ancestor that is
        # explicit).
        if not only_explicit:
            return path_infos

        explicit_path_infos = []
        has_explicit_ancestor = set()

        # This list is sorted according to the keys component, so by traversing it
        # in the reverse order we make sure that we process deeper paths first.
        for path_info in reversed(path_infos):
            path_components = tuple(path_info.components)

            if (path_info.HasField("stat_entry")
                    or path_info.HasField("hash_entry")
                    or path_components in has_explicit_ancestor):
                explicit_path_infos.append(path_info)
                has_explicit_ancestor.add(path_components[:-1])

        # Since we collected explicit paths in reverse order, we need to reverse it
        # again to conform to the interface.
        return list(reversed(explicit_path_infos))