Beispiel #1
0
    def ListDescendentPathInfos(self,
                                client_id,
                                path_type,
                                components,
                                timestamp=None,
                                max_depth=None,
                                cursor=None):
        """Lists path info records that correspond to descendants of given path."""
        path_infos = []

        query = ""

        path = mysql_utils.ComponentsToPath(components)
        values = {
            "client_id": db_utils.ClientIDToInt(client_id),
            "path_type": int(path_type),
            "path": db_utils.EscapeWildcards(path),
        }

        query += """
    SELECT path, directory, UNIX_TIMESTAMP(p.timestamp),
           stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp),
           hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp)
      FROM client_paths AS p
    """
        if timestamp is None:
            query += """
      LEFT JOIN client_path_stat_entries AS s ON
                (p.client_id = s.client_id AND
                 p.path_type = s.path_type AND
                 p.path_id = s.path_id AND
                 p.last_stat_entry_timestamp = s.timestamp)
      LEFT JOIN client_path_hash_entries AS h ON
                (p.client_id = h.client_id AND
                 p.path_type = h.path_type AND
                 p.path_id = h.path_id AND
                 p.last_hash_entry_timestamp = h.timestamp)
      """
            only_explicit = False
        else:
            query += """
      LEFT JOIN (SELECT sr.client_id, sr.path_type, sr.path_id, sr.stat_entry
                   FROM client_path_stat_entries AS sr
             INNER JOIN (SELECT client_id, path_type, path_id,
                                MAX(timestamp) AS max_timestamp
                           FROM client_path_stat_entries
                          WHERE UNIX_TIMESTAMP(timestamp) <= %(timestamp)s
                       GROUP BY client_id, path_type, path_id) AS st
                     ON sr.client_id = st.client_id
                    AND sr.path_type = st.path_type
                    AND sr.path_id = st.path_id
                    AND sr.timestamp = st.max_timestamp) AS s
             ON (p.client_id = s.client_id AND
                 p.path_type = s.path_type AND
                 p.path_id = s.path_id)
      LEFT JOIN (SELECT hr.client_id, hr.path_type, hr.path_id, hr.hash_entry
                   FROM client_path_hash_entries AS hr
             INNER JOIN (SELECT client_id, path_type, path_id,
                                MAX(timestamp) AS max_timestamp
                           FROM client_path_hash_entries
                          WHERE UNIX_TIMESTAMP(timestamp) <= %(timestamp)s
                       GROUP BY client_id, path_type, path_id) AS ht
                     ON hr.client_id = ht.client_id
                    AND hr.path_type = ht.path_type
                    AND hr.path_id = ht.path_id
                    AND hr.timestamp = ht.max_timestamp) AS h
             ON (p.client_id = h.client_id AND
                 p.path_type = h.path_type AND
                 p.path_id = h.path_id)
      """
            values["timestamp"] = mysql_utils.RDFDatetimeToTimestamp(timestamp)
            only_explicit = True

        query += """
    WHERE p.client_id = %(client_id)s
      AND p.path_type = %(path_type)s
      AND path LIKE concat(%(path)s, '/%%')
    """

        if max_depth is not None:
            query += """
      AND depth <= %(depth)s
      """
            values["depth"] = len(components) + max_depth

        cursor.execute(query, values)
        for row in cursor.fetchall():
            # pyformat: disable
            (path, directory, timestamp, stat_entry_bytes,
             last_stat_entry_timestamp, hash_entry_bytes,
             last_hash_entry_timestamp) = row
            # pyformat: enable

            components = mysql_utils.PathToComponents(path)

            if stat_entry_bytes is not None:
                stat_entry = rdf_client_fs.StatEntry.FromSerializedString(
                    stat_entry_bytes)
            else:
                stat_entry = None

            if hash_entry_bytes is not None:
                hash_entry = rdf_crypto.Hash.FromSerializedString(
                    hash_entry_bytes)
            else:
                hash_entry = None

            datetime = mysql_utils.TimestampToRDFDatetime
            path_info = rdf_objects.PathInfo(
                path_type=path_type,
                components=components,
                timestamp=datetime(timestamp),
                last_stat_entry_timestamp=datetime(last_stat_entry_timestamp),
                last_hash_entry_timestamp=datetime(last_hash_entry_timestamp),
                directory=directory,
                stat_entry=stat_entry,
                hash_entry=hash_entry)

            path_infos.append(path_info)

        path_infos.sort(key=lambda _: tuple(_.components))

        # For specific timestamp, we return information only about explicit paths
        # (paths that have associated stat or hash entry or have an ancestor that is
        # explicit).
        if not only_explicit:
            return path_infos

        explicit_path_infos = []
        has_explicit_ancestor = set()

        # This list is sorted according to the keys component, so by traversing it
        # in the reverse order we make sure that we process deeper paths first.
        for path_info in reversed(path_infos):
            components = tuple(path_info.components)

            if (path_info.HasField("stat_entry")
                    or path_info.HasField("hash_entry")
                    or components in has_explicit_ancestor):
                explicit_path_infos.append(path_info)
                has_explicit_ancestor.add(components[:-1])

        # Since we collected explicit paths in reverse order, we need to reverse it
        # again to conform to the interface.
        return list(reversed(explicit_path_infos))
Beispiel #2
0
    def _MultiWritePathInfos(self, path_infos, cursor=None):
        """Writes a collection of path info records for specified clients."""
        path_info_count = 0
        path_info_values = []

        parent_path_info_count = 0
        parent_path_info_values = []

        has_stat_entries = False
        has_hash_entries = False

        for client_id, client_path_infos in iteritems(path_infos):
            for path_info in client_path_infos:
                path = mysql_utils.ComponentsToPath(path_info.components)

                path_info_values.append(db_utils.ClientIDToInt(client_id))
                path_info_values.append(int(path_info.path_type))
                path_info_values.append(path_info.GetPathID().AsBytes())
                path_info_values.append(path)
                path_info_values.append(bool(path_info.directory))
                path_info_values.append(len(path_info.components))

                if path_info.HasField("stat_entry"):
                    path_info_values.append(
                        path_info.stat_entry.SerializeToString())
                    has_stat_entries = True
                else:
                    path_info_values.append(None)
                if path_info.HasField("hash_entry"):
                    path_info_values.append(
                        path_info.hash_entry.SerializeToString())
                    path_info_values.append(
                        path_info.hash_entry.sha256.AsBytes())
                    has_hash_entries = True
                else:
                    path_info_values.append(None)
                    path_info_values.append(None)

                path_info_count += 1

                # TODO(hanuszczak): Implement a trie in order to avoid inserting
                # duplicated records.
                for parent_path_info in path_info.GetAncestors():
                    path = mysql_utils.ComponentsToPath(
                        parent_path_info.components)

                    parent_path_info_values.append(
                        db_utils.ClientIDToInt(client_id))
                    parent_path_info_values.append(
                        int(parent_path_info.path_type))
                    parent_path_info_values.append(
                        parent_path_info.GetPathID().AsBytes())
                    parent_path_info_values.append(path)
                    parent_path_info_values.append(
                        len(parent_path_info.components))

                    parent_path_info_count += 1

        with mysql_utils.TemporaryTable(
                cursor=cursor,
                name="client_path_infos",
                columns=[
                    ("client_id", "BIGINT UNSIGNED NOT NULL"),
                    ("path_type", "INT UNSIGNED NOT NULL"),
                    ("path_id", "BINARY(32) NOT NULL"),
                    ("path", "TEXT NOT NULL"),
                    ("directory", "BOOLEAN NOT NULL"),
                    ("depth", "INT NOT NULL"),
                    ("stat_entry", "MEDIUMBLOB NULL"),
                    ("hash_entry", "MEDIUMBLOB NULL"),
                    ("sha256", "BINARY(32) NULL"),
                    ("timestamp", "TIMESTAMP(6) NOT NULL DEFAULT now(6)"),
                ]):
            if path_info_count > 0:
                query = """
        INSERT INTO client_path_infos(client_id, path_type, path_id,
                                      path, directory, depth,
                                      stat_entry, hash_entry, sha256)
        VALUES {}
        """.format(mysql_utils.Placeholders(num=9, values=path_info_count))
                cursor.execute(query, path_info_values)

                cursor.execute("""
        INSERT INTO client_paths(client_id, path_type, path_id, path,
                                 directory, depth)
             SELECT client_id, path_type, path_id, path, directory, depth
               FROM client_path_infos
        ON DUPLICATE KEY UPDATE
          client_paths.directory = (client_paths.directory OR
                                    VALUES(client_paths.directory)),
          client_paths.timestamp = now(6)
        """)

            if parent_path_info_count > 0:
                placeholders = ["(%s, %s, %s, %s, TRUE, %s)"
                                ] * parent_path_info_count

                cursor.execute(
                    """
        INSERT INTO client_paths(client_id, path_type, path_id, path,
                                 directory, depth)
        VALUES {}
        ON DUPLICATE KEY UPDATE
          directory = TRUE,
          timestamp = now()
        """.format(", ".join(placeholders)), parent_path_info_values)

            if has_stat_entries:
                cursor.execute("""
        INSERT INTO client_path_stat_entries(client_id, path_type, path_id,
                                             stat_entry, timestamp)
             SELECT client_id, path_type, path_id, stat_entry, timestamp
               FROM client_path_infos
              WHERE stat_entry IS NOT NULL
        """)

                cursor.execute("""
        UPDATE client_paths, client_path_infos
           SET client_paths.last_stat_entry_timestamp = client_path_infos.timestamp
         WHERE client_paths.client_id = client_path_infos.client_id
           AND client_paths.path_type = client_path_infos.path_type
           AND client_paths.path_id = client_path_infos.path_id
           AND client_path_infos.stat_entry IS NOT NULL
        """)

            if has_hash_entries:
                cursor.execute("""
        INSERT INTO client_path_hash_entries(client_id, path_type, path_id,
                                             hash_entry, sha256, timestamp)
             SELECT client_id, path_type, path_id, hash_entry, sha256, timestamp
               FROM client_path_infos
              WHERE hash_entry IS NOT NULL
        """)

                cursor.execute("""
        UPDATE client_paths, client_path_infos
           SET client_paths.last_hash_entry_timestamp = client_path_infos.timestamp
         WHERE client_paths.client_id = client_path_infos.client_id
           AND client_paths.path_type = client_path_infos.path_type
           AND client_paths.path_id = client_path_infos.path_id
           AND client_path_infos.hash_entry IS NOT NULL
        """)
Beispiel #3
0
    def _MultiWritePathInfos(self, path_infos, connection=None):
        """Writes a collection of path info records for specified clients."""
        path_info_count = 0
        path_info_values = []

        parent_path_info_count = 0
        parent_path_info_values = []

        has_stat_entries = False
        has_hash_entries = False

        for client_id, client_path_infos in iteritems(path_infos):
            for path_info in client_path_infos:
                path = mysql_utils.ComponentsToPath(path_info.components)

                path_info_values.append(db_utils.ClientIDToInt(client_id))
                path_info_values.append(int(path_info.path_type))
                path_info_values.append(path_info.GetPathID().AsBytes())
                path_info_values.append(path)
                path_info_values.append(bool(path_info.directory))
                path_info_values.append(len(path_info.components))

                if path_info.HasField("stat_entry"):
                    path_info_values.append(
                        path_info.stat_entry.SerializeToString())
                    has_stat_entries = True
                else:
                    path_info_values.append(None)
                if path_info.HasField("hash_entry"):
                    path_info_values.append(
                        path_info.hash_entry.SerializeToString())
                    path_info_values.append(
                        path_info.hash_entry.sha256.AsBytes())
                    has_hash_entries = True
                else:
                    path_info_values.append(None)
                    path_info_values.append(None)

                path_info_count += 1

                # TODO(hanuszczak): Implement a trie in order to avoid inserting
                # duplicated records.
                for parent_path_info in path_info.GetAncestors():
                    path = mysql_utils.ComponentsToPath(
                        parent_path_info.components)

                    parent_path_info_values.append(
                        db_utils.ClientIDToInt(client_id))
                    parent_path_info_values.append(
                        int(parent_path_info.path_type))
                    parent_path_info_values.append(
                        parent_path_info.GetPathID().AsBytes())
                    parent_path_info_values.append(path)
                    parent_path_info_values.append(
                        len(parent_path_info.components))

                    parent_path_info_count += 1

        try:
            with contextlib.closing(connection.cursor()) as cursor:
                cursor.execute("""
        CREATE TEMPORARY TABLE client_path_infos(
          client_id BIGINT UNSIGNED NOT NULL,
          path_type INT UNSIGNED NOT NULL,
          path_id BINARY(32) NOT NULL,
          path TEXT NOT NULL,
          directory BOOLEAN NOT NULL,
          depth INT NOT NULL,
          stat_entry MEDIUMBLOB NULL,
          hash_entry MEDIUMBLOB NULL,
          sha256 BINARY(32) NULL,
          timestamp TIMESTAMP(6) NOT NULL DEFAULT now(6)
        )""")

                if path_info_count > 0:
                    cursor.execute(
                        """
          INSERT INTO client_path_infos(client_id, path_type, path_id,
                                        path, directory, depth,
                                        stat_entry, hash_entry, sha256)
          VALUES {}
          """.format(mysql_utils.Placeholders(num=9, values=path_info_count)),
                        path_info_values)

                    cursor.execute("""
          INSERT INTO client_paths(client_id, path_type, path_id, path,
                                   directory, depth)
               SELECT client_id, path_type, path_id, path, directory, depth
                 FROM client_path_infos
          ON DUPLICATE KEY UPDATE
            client_paths.directory = client_paths.directory OR VALUES(client_paths.directory),
            client_paths.timestamp = now(6)
          """)

                if parent_path_info_count > 0:
                    placeholders = ["(%s, %s, %s, %s, TRUE, %s)"
                                    ] * parent_path_info_count

                    cursor.execute(
                        """
          INSERT INTO client_paths(client_id, path_type, path_id, path,
                                   directory, depth)
          VALUES {}
          ON DUPLICATE KEY UPDATE
            directory = TRUE,
            timestamp = now()
          """.format(", ".join(placeholders)), parent_path_info_values)

                if has_stat_entries:
                    cursor.execute("""
          INSERT INTO client_path_stat_entries(client_id, path_type, path_id,
                                               stat_entry, timestamp)
               SELECT client_id, path_type, path_id, stat_entry, timestamp
                 FROM client_path_infos
                WHERE stat_entry IS NOT NULL
          """)

                    cursor.execute("""
          UPDATE client_paths, client_path_infos
             SET client_paths.last_stat_entry_timestamp = client_path_infos.timestamp
           WHERE client_paths.client_id = client_path_infos.client_id
             AND client_paths.path_type = client_path_infos.path_type
             AND client_paths.path_id = client_path_infos.path_id
             AND client_path_infos.stat_entry IS NOT NULL
          """)

                if has_hash_entries:
                    cursor.execute("""
          INSERT INTO client_path_hash_entries(client_id, path_type, path_id,
                                               hash_entry, sha256, timestamp)
               SELECT client_id, path_type, path_id, hash_entry, sha256, timestamp
                 FROM client_path_infos
                WHERE hash_entry IS NOT NULL
          """)

                    cursor.execute("""
          UPDATE client_paths, client_path_infos
             SET client_paths.last_hash_entry_timestamp = client_path_infos.timestamp
           WHERE client_paths.client_id = client_path_infos.client_id
             AND client_paths.path_type = client_path_infos.path_type
             AND client_paths.path_id = client_path_infos.path_id
             AND client_path_infos.hash_entry IS NOT NULL
          """)
        finally:
            # Drop the temporary table in a separate cursor. This ensures that
            # even if the previous cursor.execute fails mid-way leaving the
            # temporary table created (as table creation can't be rolled back), the
            # table would still be correctly dropped.
            #
            # This is important since connections are reused in the MySQL connection
            # pool.
            with contextlib.closing(connection.cursor()) as cursor:
                cursor.execute(
                    "DROP TEMPORARY TABLE IF EXISTS client_path_infos")
Beispiel #4
0
    def _MultiWritePathInfos(self, path_infos, cursor=None):
        """Writes a collection of path info records for specified clients."""
        now = rdfvalue.RDFDatetime.Now()

        path_info_values = []
        parent_path_info_values = []

        stat_entry_keys = []
        stat_entry_values = []

        hash_entry_keys = []
        hash_entry_values = []

        for client_id, client_path_infos in iteritems(path_infos):
            for path_info in client_path_infos:
                path = mysql_utils.ComponentsToPath(path_info.components)

                key = (
                    db_utils.ClientIDToInt(client_id),
                    int(path_info.path_type),
                    path_info.GetPathID().AsBytes(),
                )

                path_info_values.append(key + (
                    mysql_utils.RDFDatetimeToTimestamp(now), path,
                    bool(path_info.directory), len(path_info.components)))

                if path_info.HasField("stat_entry"):
                    stat_entry_keys.extend(key)
                    stat_entry_values.append(
                        key + (mysql_utils.RDFDatetimeToTimestamp(now),
                               path_info.stat_entry.SerializeToBytes()))

                if path_info.HasField("hash_entry"):
                    hash_entry_keys.extend(key)
                    hash_entry_values.append(
                        key + (mysql_utils.RDFDatetimeToTimestamp(now),
                               path_info.hash_entry.SerializeToBytes(),
                               path_info.hash_entry.sha256.AsBytes()))

                # TODO(hanuszczak): Implement a trie in order to avoid inserting
                # duplicated records.
                for parent_path_info in path_info.GetAncestors():
                    path = mysql_utils.ComponentsToPath(
                        parent_path_info.components)
                    parent_path_info_values.append((
                        db_utils.ClientIDToInt(client_id),
                        int(parent_path_info.path_type),
                        parent_path_info.GetPathID().AsBytes(),
                        path,
                        len(parent_path_info.components),
                    ))

        if path_info_values:
            query = """
        INSERT INTO client_paths(client_id, path_type, path_id,
                                 timestamp,
                                 path, directory, depth)
        VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s, %s, %s)
        ON DUPLICATE KEY UPDATE
          timestamp = VALUES(timestamp),
          directory = directory OR VALUES(directory)
      """
            cursor.executemany(query, path_info_values)

        if parent_path_info_values:
            query = """
        INSERT INTO client_paths(client_id, path_type, path_id, path,
                                 directory, depth)
        VALUES (%s, %s, %s, %s, TRUE, %s)
        ON DUPLICATE KEY UPDATE
          directory = TRUE,
          timestamp = NOW(6)
      """
            cursor.executemany(query, parent_path_info_values)

        if stat_entry_values:
            query = """
        INSERT INTO client_path_stat_entries(client_id, path_type, path_id,
                                             timestamp,
                                             stat_entry)
        VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s)
      """
            cursor.executemany(query, stat_entry_values)

            condition = "(client_id = %s AND path_type = %s AND path_id = %s)"

            query = """
        UPDATE client_paths
        SET last_stat_entry_timestamp = FROM_UNIXTIME(%s)
        WHERE {}
      """.format(" OR ".join([condition] * len(stat_entry_values)))

            params = [mysql_utils.RDFDatetimeToTimestamp(now)
                      ] + stat_entry_keys
            cursor.execute(query, params)

        if hash_entry_values:
            query = """
        INSERT INTO client_path_hash_entries(client_id, path_type, path_id,
                                             timestamp,
                                             hash_entry, sha256)
        VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s, %s)
      """
            cursor.executemany(query, hash_entry_values)

            condition = "(client_id = %s AND path_type = %s AND path_id = %s)"

            query = """
        UPDATE client_paths
        SET last_hash_entry_timestamp = FROM_UNIXTIME(%s)
        WHERE {}
      """.format(" OR ".join([condition] * len(hash_entry_values)))

            params = [mysql_utils.RDFDatetimeToTimestamp(now)
                      ] + hash_entry_keys
            cursor.execute(query, params)
Beispiel #5
0
    def WritePathInfos(
        self,
        client_id: str,
        path_infos: Sequence[rdf_objects.PathInfo],
        cursor: Optional[MySQLdb.cursors.Cursor] = None,
    ) -> None:
        """Writes a collection of path_info records for a client."""
        now = rdfvalue.RDFDatetime.Now()

        int_client_id = db_utils.ClientIDToInt(client_id)

        path_info_values = []
        parent_path_info_values = []

        stat_entry_keys = []
        stat_entry_values = []

        hash_entry_keys = []
        hash_entry_values = []

        for path_info in path_infos:
            path = mysql_utils.ComponentsToPath(path_info.components)

            key = (
                int_client_id,
                int(path_info.path_type),
                path_info.GetPathID().AsBytes(),
            )

            path_info_values.append(key +
                                    (mysql_utils.RDFDatetimeToTimestamp(now),
                                     path, bool(path_info.directory),
                                     len(path_info.components)))

            if path_info.HasField("stat_entry"):
                stat_entry_keys.extend(key)
                stat_entry_values.append(
                    key + (mysql_utils.RDFDatetimeToTimestamp(now),
                           path_info.stat_entry.SerializeToBytes()))

            if path_info.HasField("hash_entry"):
                hash_entry_keys.extend(key)
                hash_entry_values.append(
                    key + (mysql_utils.RDFDatetimeToTimestamp(now),
                           path_info.hash_entry.SerializeToBytes(),
                           path_info.hash_entry.sha256.AsBytes()))

            # TODO(hanuszczak): Implement a trie in order to avoid inserting
            # duplicated records.
            for parent_path_info in path_info.GetAncestors():
                path = mysql_utils.ComponentsToPath(
                    parent_path_info.components)
                parent_path_info_values.append((
                    int_client_id,
                    int(parent_path_info.path_type),
                    parent_path_info.GetPathID().AsBytes(),
                    path,
                    len(parent_path_info.components),
                ))

        if path_info_values:
            query = """
        INSERT INTO client_paths(client_id, path_type, path_id,
                                 timestamp,
                                 path, directory, depth)
        VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s, %s, %s)
        ON DUPLICATE KEY UPDATE
          timestamp = VALUES(timestamp),
          directory = directory OR VALUES(directory)
      """

            try:
                cursor.executemany(query, path_info_values)
            except MySQLdb.IntegrityError as error:
                raise db.UnknownClientError(client_id=client_id, cause=error)

        if parent_path_info_values:
            query = """
        INSERT INTO client_paths(client_id, path_type, path_id, path,
                                 directory, depth)
        VALUES (%s, %s, %s, %s, TRUE, %s)
        ON DUPLICATE KEY UPDATE
          directory = TRUE,
          timestamp = NOW(6)
      """
            cursor.executemany(query, parent_path_info_values)

        if stat_entry_values:
            query = """
        INSERT INTO client_path_stat_entries(client_id, path_type, path_id,
                                             timestamp,
                                             stat_entry)
        VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s)
      """
            cursor.executemany(query, stat_entry_values)

            condition = "(client_id = %s AND path_type = %s AND path_id = %s)"

            query = """
        UPDATE client_paths
        FORCE INDEX (PRIMARY)
        SET last_stat_entry_timestamp = FROM_UNIXTIME(%s)
        WHERE {}
      """.format(" OR ".join([condition] * len(stat_entry_values)))

            params = [mysql_utils.RDFDatetimeToTimestamp(now)
                      ] + stat_entry_keys
            cursor.execute(query, params)

        if hash_entry_values:
            query = """
        INSERT INTO client_path_hash_entries(client_id, path_type, path_id,
                                             timestamp,
                                             hash_entry, sha256)
        VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s, %s)
      """
            cursor.executemany(query, hash_entry_values)

            condition = "(client_id = %s AND path_type = %s AND path_id = %s)"

            query = """
        UPDATE client_paths
        FORCE INDEX (PRIMARY)
        SET last_hash_entry_timestamp = FROM_UNIXTIME(%s)
        WHERE {}
      """.format(" OR ".join([condition] * len(hash_entry_values)))

            params = [mysql_utils.RDFDatetimeToTimestamp(now)
                      ] + hash_entry_keys
            cursor.execute(query, params)