def WriteClientCrashInfo(self, client_id, crash_info, cursor=None): """Writes a new client crash record.""" cursor.execute("SET @now = NOW(6)") params = { "client_id": db_utils.ClientIDToInt(client_id), "crash_info": crash_info.SerializeToBytes(), } try: cursor.execute( """ INSERT INTO client_crash_history (client_id, timestamp, crash_info) VALUES (%(client_id)s, @now, %(crash_info)s) """, params) cursor.execute( """ UPDATE clients SET last_crash_timestamp = @now WHERE client_id = %(client_id)s """, params) except MySQLdb.IntegrityError as e: raise db.UnknownClientError(client_id, cause=e)
def MultiReadClientMetadata(self, client_ids, cursor=None): """Reads ClientMetadata records for a list of clients.""" ids = [db_utils.ClientIDToInt(client_id) for client_id in client_ids] query = ("SELECT client_id, fleetspeak_enabled, certificate, " "UNIX_TIMESTAMP(last_ping), " "UNIX_TIMESTAMP(last_clock), last_ip, " "UNIX_TIMESTAMP(last_foreman), UNIX_TIMESTAMP(first_seen), " "UNIX_TIMESTAMP(last_crash_timestamp), " "UNIX_TIMESTAMP(last_startup_timestamp) FROM " "clients WHERE client_id IN ({})").format(", ".join(["%s"] * len(ids))) ret = {} cursor.execute(query, ids) while True: row = cursor.fetchone() if not row: break cid, fs, crt, ping, clk, ip, foreman, first, lct, lst = row ret[db_utils.IntToClientID(cid)] = rdf_objects.ClientMetadata( certificate=crt, fleetspeak_enabled=fs, first_seen=mysql_utils.TimestampToRDFDatetime(first), ping=mysql_utils.TimestampToRDFDatetime(ping), clock=mysql_utils.TimestampToRDFDatetime(clk), ip=mysql_utils.StringToRDFProto( rdf_client_network.NetworkAddress, ip), last_foreman_time=mysql_utils.TimestampToRDFDatetime(foreman), startup_info_timestamp=mysql_utils.TimestampToRDFDatetime(lst), last_crash_timestamp=mysql_utils.TimestampToRDFDatetime(lct)) return ret
def ReadClientStartupInfoHistory(self, client_id, timerange=None, cursor=None): """Reads the full startup history for a particular client.""" client_id_int = db_utils.ClientIDToInt(client_id) query = ("SELECT startup_info, UNIX_TIMESTAMP(timestamp) " "FROM client_startup_history " "WHERE client_id=%s ") args = [client_id_int] if timerange: time_from, time_to = timerange # pylint: disable=unpacking-non-sequence if time_from is not None: query += "AND timestamp >= FROM_UNIXTIME(%s) " args.append(mysql_utils.RDFDatetimeToTimestamp(time_from)) if time_to is not None: query += "AND timestamp <= FROM_UNIXTIME(%s) " args.append(mysql_utils.RDFDatetimeToTimestamp(time_to)) query += "ORDER BY timestamp DESC " ret = [] cursor.execute(query, args) for startup_info, timestamp in cursor.fetchall(): si = rdf_client.StartupInfo.FromSerializedBytes(startup_info) si.timestamp = mysql_utils.TimestampToRDFDatetime(timestamp) ret.append(si) return ret
def RemoveClientKeyword(self, client_id, keyword, cursor=None): """Removes the association of a particular client to a keyword.""" cursor.execute( "DELETE FROM client_keywords " "WHERE client_id = %s AND keyword_hash = %s", [db_utils.ClientIDToInt(client_id), mysql_utils.Hash(keyword)])
def MultiReadClientSnapshot(self, client_ids, cursor=None): """Reads the latest client snapshots for a list of clients.""" int_ids = [db_utils.ClientIDToInt(cid) for cid in client_ids] query = ( "SELECT h.client_id, h.client_snapshot, UNIX_TIMESTAMP(h.timestamp)," " s.startup_info " "FROM clients as c FORCE INDEX (PRIMARY), " "client_snapshot_history as h FORCE INDEX (PRIMARY), " "client_startup_history as s FORCE INDEX (PRIMARY) " "WHERE h.client_id = c.client_id " "AND s.client_id = c.client_id " "AND h.timestamp = c.last_snapshot_timestamp " "AND s.timestamp = c.last_startup_timestamp " "AND c.client_id IN ({})").format(", ".join(["%s"] * len(client_ids))) ret = {cid: None for cid in client_ids} cursor.execute(query, int_ids) while True: row = cursor.fetchone() if not row: break cid, snapshot, timestamp, startup_info = row client_obj = mysql_utils.StringToRDFProto( rdf_objects.ClientSnapshot, snapshot) client_obj.startup_info = mysql_utils.StringToRDFProto( rdf_client.StartupInfo, startup_info) client_obj.timestamp = mysql_utils.TimestampToRDFDatetime( timestamp) ret[db_utils.IntToClientID(cid)] = client_obj return ret
def WriteClientStats(self, client_id: Text, stats: rdf_client_stats.ClientStats, cursor=None) -> None: """Stores a ClientStats instance.""" if stats.timestamp is None: stats.timestamp = rdfvalue.RDFDatetime.Now() try: cursor.execute( """ INSERT INTO client_stats (client_id, payload, timestamp) VALUES (%s, %s, FROM_UNIXTIME(%s)) ON DUPLICATE KEY UPDATE payload=VALUES(payload) """, [ db_utils.ClientIDToInt(client_id), stats.SerializeToBytes(), mysql_utils.RDFDatetimeToTimestamp(stats.timestamp) ]) except MySQLdb.IntegrityError as e: if e.args[0] == mysql_error_constants.NO_REFERENCED_ROW_2: raise db.UnknownClientError(client_id, cause=e) else: raise
def WriteClientSnapshotHistory(self, clients, cursor=None): """Writes the full history for a particular client.""" client_id = clients[0].client_id latest_timestamp = max(client.timestamp for client in clients) base_params = { "client_id": db_utils.ClientIDToInt(client_id), "latest_timestamp": mysql_utils.RDFDatetimeToTimestamp(latest_timestamp) } try: for client in clients: startup_info = client.startup_info client.startup_info = None params = base_params.copy() params.update({ "timestamp": mysql_utils.RDFDatetimeToTimestamp(client.timestamp), "client_snapshot": client.SerializeToString(), "startup_info": startup_info.SerializeToString(), }) cursor.execute( """ INSERT INTO client_snapshot_history (client_id, timestamp, client_snapshot) VALUES (%(client_id)s, FROM_UNIXTIME(%(timestamp)s), %(client_snapshot)s) """, params) cursor.execute( """ INSERT INTO client_startup_history (client_id, timestamp, startup_info) VALUES (%(client_id)s, FROM_UNIXTIME(%(timestamp)s), %(startup_info)s) """, params) client.startup_info = startup_info cursor.execute( """ UPDATE clients SET last_snapshot_timestamp = FROM_UNIXTIME(%(latest_timestamp)s) WHERE client_id = %(client_id)s AND (last_snapshot_timestamp IS NULL OR last_snapshot_timestamp < FROM_UNIXTIME(%(latest_timestamp)s)) """, base_params) cursor.execute( """ UPDATE clients SET last_startup_timestamp = FROM_UNIXTIME(%(latest_timestamp)s) WHERE client_id = %(client_id)s AND (last_startup_timestamp IS NULL OR last_startup_timestamp < FROM_UNIXTIME(%(latest_timestamp)s)) """, base_params) except MySQLdb.IntegrityError as error: raise db.UnknownClientError(client_id, cause=error)
def RemoveClientLabels(self, client_id, owner, labels, cursor=None): """Removes a list of user labels from a given client.""" query = ("DELETE FROM client_labels " "WHERE client_id = %s AND owner_username_hash = %s " "AND label IN ({})").format(", ".join(["%s"] * len(labels))) args = [db_utils.ClientIDToInt(client_id), mysql_utils.Hash(owner)] + labels cursor.execute(query, args)
def WriteClientSnapshotHistory(self, clients, cursor=None): """Writes the full history for a particular client.""" client_id = clients[0].client_id latest_timestamp = max(client.timestamp for client in clients) query = "" params = { "client_id": db_utils.ClientIDToInt(client_id), "latest_timestamp": mysql_utils.RDFDatetimeToTimestamp(latest_timestamp) } for idx, client in enumerate(clients): startup_info = client.startup_info client.startup_info = None query += """ INSERT INTO client_snapshot_history (client_id, timestamp, client_snapshot) VALUES (%(client_id)s, FROM_UNIXTIME(%(timestamp_{idx})s), %(client_snapshot_{idx})s); INSERT INTO client_startup_history (client_id, timestamp, startup_info) VALUES (%(client_id)s, FROM_UNIXTIME(%(timestamp_{idx})s), %(startup_info_{idx})s); """.format(idx=idx) params.update({ "timestamp_{idx}".format(idx=idx): mysql_utils.RDFDatetimeToTimestamp(client.timestamp), "client_snapshot_{idx}".format(idx=idx): client.SerializeToString(), "startup_info_{idx}".format(idx=idx): startup_info.SerializeToString(), }) client.startup_info = startup_info query += """ UPDATE clients SET last_snapshot_timestamp = FROM_UNIXTIME(%(latest_timestamp)s) WHERE client_id = %(client_id)s AND (last_snapshot_timestamp IS NULL OR last_snapshot_timestamp < FROM_UNIXTIME(%(latest_timestamp)s)); UPDATE clients SET last_startup_timestamp = FROM_UNIXTIME(%(latest_timestamp)s) WHERE client_id = %(client_id)s AND (last_startup_timestamp IS NULL OR last_startup_timestamp < FROM_UNIXTIME(%(latest_timestamp)s)); """ try: cursor.execute(query, params) except MySQLdb.IntegrityError as error: raise db.UnknownClientError(client_id, cause=error)
def DeleteClient(self, client_id, cursor=None): """Deletes a client with all associated metadata.""" cursor.execute("SELECT COUNT(*) FROM clients WHERE client_id = %s", [db_utils.ClientIDToInt(client_id)]) if cursor.fetchone()[0] == 0: raise db.UnknownClientError(client_id) # Clean out foreign keys first. cursor.execute( """ UPDATE clients SET last_crash_timestamp = NULL, last_snapshot_timestamp = NULL, last_startup_timestamp = NULL WHERE client_id = %s""", [db_utils.ClientIDToInt(client_id)]) cursor.execute("DELETE FROM clients WHERE client_id = %s", [db_utils.ClientIDToInt(client_id)])
def WriteClientMetadata(self, client_id, certificate=None, fleetspeak_enabled=None, first_seen=None, last_ping=None, last_clock=None, last_ip=None, last_foreman=None, cursor=None): """Write metadata about the client.""" placeholders = [] values = collections.OrderedDict() placeholders.append("%(client_id)s") values["client_id"] = db_utils.ClientIDToInt(client_id) if certificate: placeholders.append("%(certificate)s") values["certificate"] = certificate.SerializeToBytes() if fleetspeak_enabled is not None: placeholders.append("%(fleetspeak_enabled)s") values["fleetspeak_enabled"] = fleetspeak_enabled if first_seen is not None: placeholders.append("FROM_UNIXTIME(%(first_seen)s)") values["first_seen"] = mysql_utils.RDFDatetimeToTimestamp( first_seen) if last_ping is not None: placeholders.append("FROM_UNIXTIME(%(last_ping)s)") values["last_ping"] = mysql_utils.RDFDatetimeToTimestamp(last_ping) if last_clock: placeholders.append("FROM_UNIXTIME(%(last_clock)s)") values["last_clock"] = mysql_utils.RDFDatetimeToTimestamp( last_clock) if last_ip: placeholders.append("%(last_ip)s") values["last_ip"] = last_ip.SerializeToBytes() if last_foreman: placeholders.append("FROM_UNIXTIME(%(last_foreman)s)") values["last_foreman"] = mysql_utils.RDFDatetimeToTimestamp( last_foreman) updates = [] for column in iterkeys(values): updates.append("{column} = VALUES({column})".format(column=column)) query = """ INSERT INTO clients ({columns}) VALUES ({placeholders}) ON DUPLICATE KEY UPDATE {updates} """.format(columns=", ".join(iterkeys(values)), placeholders=", ".join(placeholders), updates=", ".join(updates)) cursor.execute(query, values)
def WriteClientSnapshot(self, snapshot, cursor=None): """Write new client snapshot.""" insert_history_query = ( "INSERT INTO client_snapshot_history(client_id, timestamp, " "client_snapshot) VALUES (%s, FROM_UNIXTIME(%s), %s)") insert_startup_query = ( "INSERT INTO client_startup_history(client_id, timestamp, " "startup_info) VALUES(%s, FROM_UNIXTIME(%s), %s)") now = rdfvalue.RDFDatetime.Now() current_timestamp = mysql_utils.RDFDatetimeToTimestamp(now) client_info = { "last_snapshot_timestamp": current_timestamp, "last_startup_timestamp": current_timestamp, "last_version_string": snapshot.GetGRRVersionString(), "last_platform": snapshot.knowledge_base.os, "last_platform_release": snapshot.Uname(), } update_clauses = [ "last_snapshot_timestamp = FROM_UNIXTIME(%(last_snapshot_timestamp)s)", "last_startup_timestamp = FROM_UNIXTIME(%(last_startup_timestamp)s)", "last_version_string = %(last_version_string)s", "last_platform = %(last_platform)s", "last_platform_release = %(last_platform_release)s", ] update_query = ( "UPDATE clients SET {} WHERE client_id = %(client_id)s".format( ", ".join(update_clauses))) int_client_id = db_utils.ClientIDToInt(snapshot.client_id) client_info["client_id"] = int_client_id startup_info = snapshot.startup_info snapshot.startup_info = None try: cursor.execute(insert_history_query, (int_client_id, current_timestamp, snapshot.SerializeToBytes())) cursor.execute(insert_startup_query, (int_client_id, current_timestamp, startup_info.SerializeToBytes())) cursor.execute(update_query, client_info) except MySQLdb.IntegrityError as e: if e.args and e.args[ 0] == mysql_error_constants.NO_REFERENCED_ROW_2: raise db.UnknownClientError(snapshot.client_id, cause=e) elif e.args and e.args[0] == mysql_error_constants.DUP_ENTRY: raise mysql_utils.RetryableError(cause=e) else: raise finally: snapshot.startup_info = startup_info
def ReadClientCrashInfoHistory(self, client_id, cursor=None): """Reads the full crash history for a particular client.""" cursor.execute( "SELECT UNIX_TIMESTAMP(timestamp), crash_info " "FROM client_crash_history WHERE " "client_crash_history.client_id = %s " "ORDER BY timestamp DESC", [db_utils.ClientIDToInt(client_id)]) ret = [] for timestamp, crash_info in cursor.fetchall(): ci = rdf_client.ClientCrash.FromSerializedBytes(crash_info) ci.timestamp = mysql_utils.TimestampToRDFDatetime(timestamp) ret.append(ci) return ret
def AddClientLabels(self, client_id, owner, labels, cursor=None): """Attaches a list of user labels to a client.""" cid = db_utils.ClientIDToInt(client_id) labels = set(labels) args = [(cid, mysql_utils.Hash(owner), owner, label) for label in labels] args = list(collection.Flatten(args)) query = """ INSERT IGNORE INTO client_labels (client_id, owner_username_hash, owner_username, label) VALUES {} """.format(", ".join(["(%s, %s, %s, %s)"] * len(labels))) try: cursor.execute(query, args) except MySQLdb.IntegrityError as e: raise db.UnknownClientError(client_id, cause=e)
def AddClientKeywords(self, client_id, keywords, cursor=None): """Associates the provided keywords with the client.""" cid = db_utils.ClientIDToInt(client_id) keywords = set(keywords) args = [(cid, mysql_utils.Hash(kw), kw) for kw in keywords] args = list(collection.Flatten(args)) query = """ INSERT INTO client_keywords (client_id, keyword_hash, keyword) VALUES {} ON DUPLICATE KEY UPDATE timestamp = NOW(6) """.format(", ".join(["(%s, %s, %s)"] * len(keywords))) try: cursor.execute(query, args) except MySQLdb.IntegrityError as e: raise db.UnknownClientError(client_id, cause=e)
def ReadClientCrashInfo(self, client_id, cursor=None): """Reads the latest client crash record for a single client.""" cursor.execute( "SELECT UNIX_TIMESTAMP(timestamp), crash_info " "FROM clients, client_crash_history WHERE " "clients.client_id = client_crash_history.client_id AND " "clients.last_crash_timestamp = client_crash_history.timestamp AND " "clients.client_id = %s", [db_utils.ClientIDToInt(client_id)]) row = cursor.fetchone() if not row: return None timestamp, crash_info = row res = rdf_client.ClientCrash.FromSerializedBytes(crash_info) res.timestamp = mysql_utils.TimestampToRDFDatetime(timestamp) return res
def ReadClientStartupInfo(self, client_id, cursor=None): """Reads the latest client startup record for a single client.""" query = ( "SELECT startup_info, UNIX_TIMESTAMP(timestamp) " "FROM clients, client_startup_history " "WHERE clients.last_startup_timestamp=client_startup_history.timestamp " "AND clients.client_id=client_startup_history.client_id " "AND clients.client_id=%s") cursor.execute(query, [db_utils.ClientIDToInt(client_id)]) row = cursor.fetchone() if row is None: return None startup_info, timestamp = row res = rdf_client.StartupInfo.FromSerializedBytes(startup_info) res.timestamp = mysql_utils.TimestampToRDFDatetime(timestamp) return res
def MultiReadClientLabels(self, client_ids, cursor=None): """Reads the user labels for a list of clients.""" int_ids = [db_utils.ClientIDToInt(cid) for cid in client_ids] query = ("SELECT client_id, owner_username, label " "FROM client_labels " "WHERE client_id IN ({})").format(", ".join(["%s"] * len(client_ids))) ret = {client_id: [] for client_id in client_ids} cursor.execute(query, int_ids) for client_id, owner, label in cursor.fetchall(): ret[db_utils.IntToClientID(client_id)].append( rdf_objects.ClientLabel(name=label, owner=owner)) for r in itervalues(ret): r.sort(key=lambda label: (label.owner, label.name)) return ret
def MultiReadClientFullInfo(self, client_ids, min_last_ping=None, cursor=None): """Reads full client information for a list of clients.""" if not client_ids: return {} query = ( "SELECT " "c.client_id, c.fleetspeak_enabled, c.certificate, " "UNIX_TIMESTAMP(c.last_ping), UNIX_TIMESTAMP(c.last_clock), " "c.last_ip, UNIX_TIMESTAMP(c.last_foreman), " "UNIX_TIMESTAMP(c.first_seen), " "UNIX_TIMESTAMP(c.last_snapshot_timestamp), " "UNIX_TIMESTAMP(c.last_crash_timestamp), " "UNIX_TIMESTAMP(c.last_startup_timestamp), " "h.client_snapshot, s.startup_info, s_last.startup_info, " "l.owner_username, l.label " "FROM clients as c " "FORCE INDEX (PRIMARY) " "LEFT JOIN client_snapshot_history as h FORCE INDEX (PRIMARY) ON ( " "c.client_id = h.client_id AND " "h.timestamp = c.last_snapshot_timestamp) " "LEFT JOIN client_startup_history as s FORCE INDEX (PRIMARY) ON ( " "c.client_id = s.client_id AND " "s.timestamp = c.last_snapshot_timestamp) " "LEFT JOIN client_startup_history as s_last FORCE INDEX (PRIMARY) ON ( " "c.client_id = s_last.client_id " "AND s_last.timestamp = c.last_startup_timestamp) " "LEFT JOIN client_labels AS l FORCE INDEX (PRIMARY) " "ON (c.client_id = l.client_id) ") query += "WHERE c.client_id IN (%s) " % ", ".join( ["%s"] * len(client_ids)) values = [db_utils.ClientIDToInt(cid) for cid in client_ids] if min_last_ping is not None: query += "AND c.last_ping >= FROM_UNIXTIME(%s)" values.append(mysql_utils.RDFDatetimeToTimestamp(min_last_ping)) cursor.execute(query, values) return dict(self._ResponseToClientsFullInfo(cursor.fetchall()))
def _ReadClientLastPings(self, last_client_id, count, min_last_ping=None, max_last_ping=None, fleetspeak_enabled=None, cursor=None): """Yields dicts of last-ping timestamps for clients in the DB.""" where_filters = ["client_id > %s"] query_values = [db_utils.ClientIDToInt(last_client_id)] if min_last_ping is not None: where_filters.append("last_ping >= FROM_UNIXTIME(%s) ") query_values.append( mysql_utils.RDFDatetimeToTimestamp(min_last_ping)) if max_last_ping is not None: where_filters.append( "(last_ping IS NULL OR last_ping <= FROM_UNIXTIME(%s))") query_values.append( mysql_utils.RDFDatetimeToTimestamp(max_last_ping)) if fleetspeak_enabled is not None: if fleetspeak_enabled: where_filters.append("fleetspeak_enabled IS TRUE") else: where_filters.append( "(fleetspeak_enabled IS NULL OR fleetspeak_enabled IS FALSE)" ) query = """ SELECT client_id, UNIX_TIMESTAMP(last_ping) FROM clients WHERE {} ORDER BY client_id LIMIT %s""".format(" AND ".join(where_filters)) cursor.execute(query, query_values + [count]) last_pings = {} last_client_id = None for int_client_id, last_ping in cursor.fetchall(): last_client_id = db_utils.IntToClientID(int_client_id) last_pings[last_client_id] = mysql_utils.TimestampToRDFDatetime( last_ping) return last_client_id, last_pings
def ReadClientSnapshotHistory(self, client_id, timerange=None, cursor=None): """Reads the full history for a particular client.""" client_id_int = db_utils.ClientIDToInt(client_id) query = ("SELECT sn.client_snapshot, st.startup_info, " " UNIX_TIMESTAMP(sn.timestamp) FROM " "client_snapshot_history AS sn, " "client_startup_history AS st WHERE " "sn.client_id = st.client_id AND " "sn.timestamp = st.timestamp AND " "sn.client_id=%s ") args = [client_id_int] if timerange: time_from, time_to = timerange # pylint: disable=unpacking-non-sequence if time_from is not None: query += "AND sn.timestamp >= FROM_UNIXTIME(%s) " args.append(mysql_utils.RDFDatetimeToTimestamp(time_from)) if time_to is not None: query += "AND sn.timestamp <= FROM_UNIXTIME(%s) " args.append(mysql_utils.RDFDatetimeToTimestamp(time_to)) query += "ORDER BY sn.timestamp DESC" ret = [] cursor.execute(query, args) for snapshot, startup_info, timestamp in cursor.fetchall(): client = rdf_objects.ClientSnapshot.FromSerializedBytes(snapshot) client.startup_info = rdf_client.StartupInfo.FromSerializedBytes( startup_info) client.timestamp = mysql_utils.TimestampToRDFDatetime(timestamp) ret.append(client) return ret
def ReadClientStats(self, client_id, min_timestamp, max_timestamp, cursor=None): """Reads ClientStats for a given client and time range.""" cursor.execute( """ SELECT payload FROM client_stats WHERE client_id = %s AND timestamp BETWEEN FROM_UNIXTIME(%s) AND FROM_UNIXTIME(%s) ORDER BY timestamp ASC """, [ db_utils.ClientIDToInt(client_id), mysql_utils.RDFDatetimeToTimestamp(min_timestamp), mysql_utils.RDFDatetimeToTimestamp(max_timestamp) ]) return [ rdf_client_stats.ClientStats.FromSerializedBytes(stats_bytes) for stats_bytes, in cursor.fetchall() ]
def WriteClientStartupInfo(self, client_id, startup_info, cursor=None): """Writes a new client startup record.""" query = """ SET @now = NOW(6); INSERT INTO client_startup_history (client_id, timestamp, startup_info) VALUES (%(client_id)s, @now, %(startup_info)s); UPDATE clients SET last_startup_timestamp = @now WHERE client_id = %(client_id)s; """ params = { "client_id": db_utils.ClientIDToInt(client_id), "startup_info": startup_info.SerializeToString(), } try: cursor.execute(query, params) except MySQLdb.IntegrityError as e: raise db.UnknownClientError(client_id, cause=e)
def ReadLatestPathInfosWithHashBlobReferences(self, client_paths, max_timestamp=None, cursor=None): """Returns PathInfos that have corresponding HashBlobReferences.""" path_infos = {client_path: None for client_path in client_paths} path_id_components = {} for client_path in client_paths: path_id_components[client_path.path_id] = client_path.components params = [] query = """ SELECT t.client_id, t.path_type, t.path_id, UNIX_TIMESTAMP(t.timestamp), s.stat_entry, h.hash_entry FROM (SELECT h.client_id, h.path_type, h.path_id, MAX(h.timestamp) AS timestamp FROM client_path_hash_entries AS h INNER JOIN hash_blob_references AS b ON b.hash_id = h.sha256 WHERE {conditions} GROUP BY client_id, path_type, path_id) AS t LEFT JOIN client_path_stat_entries AS s ON s.client_id = t.client_id AND s.path_type = t.path_type AND s.path_id = t.path_id AND s.timestamp = t.timestamp LEFT JOIN client_path_hash_entries AS h ON h.client_id = t.client_id AND h.path_type = t.path_type AND h.path_id = t.path_id AND h.timestamp = t.timestamp """ path_conditions = [] for client_path in client_paths: path_conditions.append(""" (client_id = %s AND path_type = %s AND path_id = %s) """) params.append(db_utils.ClientIDToInt(client_path.client_id)) params.append(int(client_path.path_type)) params.append(client_path.path_id.AsBytes()) conditions = " OR ".join(path_conditions) if max_timestamp is not None: conditions = "({}) AND UNIX_TIMESTAMP(timestamp) <= %s".format( conditions) params.append(mysql_utils.RDFDatetimeToTimestamp(max_timestamp)) cursor.execute(query.format(conditions=conditions), params) for row in cursor.fetchall(): # pyformat: disable (client_id, path_type, path_id_bytes, timestamp, stat_entry_bytes, hash_entry_bytes) = row # pyformat: enable path_id = rdf_objects.PathID.FromBytes(path_id_bytes) components = path_id_components[path_id] if stat_entry_bytes is not None: stat_entry = rdf_client_fs.StatEntry.FromSerializedString( stat_entry_bytes) else: stat_entry = None hash_entry = rdf_crypto.Hash.FromSerializedString(hash_entry_bytes) client_path = db.ClientPath( client_id=db_utils.IntToClientID(client_id), path_type=path_type, components=path_id_components[path_id]) path_info = rdf_objects.PathInfo( path_type=path_type, components=components, stat_entry=stat_entry, hash_entry=hash_entry, timestamp=mysql_utils.TimestampToRDFDatetime(timestamp)) path_infos[client_path] = path_info return path_infos
def ReadPathInfosHistories(self, client_id, path_type, components_list, cursor=None): """Reads a collection of hash and stat entries for given paths.""" # MySQL does not handle well empty `IN` clauses so we guard against that. if not components_list: return {} path_infos = {components: [] for components in components_list} path_id_components = {} for components in components_list: path_id = rdf_objects.PathID.FromComponents(components) path_id_components[path_id] = components # MySQL does not support full outer joins, so we emulate them with a union. query = """ SELECT s.path_id, s.stat_entry, UNIX_TIMESTAMP(s.timestamp), h.path_id, h.hash_entry, UNIX_TIMESTAMP(h.timestamp) FROM client_path_stat_entries AS s LEFT JOIN client_path_hash_entries AS h ON s.client_id = h.client_id AND s.path_type = h.path_type AND s.path_id = h.path_id AND s.timestamp = h.timestamp WHERE s.client_id = %(client_id)s AND s.path_type = %(path_type)s AND s.path_id IN %(path_ids)s UNION SELECT s.path_id, s.stat_entry, UNIX_TIMESTAMP(s.timestamp), h.path_id, h.hash_entry, UNIX_TIMESTAMP(h.timestamp) FROM client_path_hash_entries AS h LEFT JOIN client_path_stat_entries AS s ON h.client_id = s.client_id AND h.path_type = s.path_type AND h.path_id = s.path_id AND h.timestamp = s.timestamp WHERE h.client_id = %(client_id)s AND h.path_type = %(path_type)s AND h.path_id IN %(path_ids)s """ params = { "client_id": db_utils.ClientIDToInt(client_id), "path_type": int(path_type), "path_ids": [path_id.AsBytes() for path_id in path_id_components] } cursor.execute(query, params) for row in cursor.fetchall(): # pyformat: disable (stat_entry_path_id_bytes, stat_entry_bytes, stat_entry_timestamp, hash_entry_path_id_bytes, hash_entry_bytes, hash_entry_timestamp) = row # pyformat: enable path_id_bytes = stat_entry_path_id_bytes or hash_entry_path_id_bytes path_id = rdf_objects.PathID.FromBytes(path_id_bytes) components = path_id_components[path_id] timestamp = stat_entry_timestamp or hash_entry_timestamp if stat_entry_bytes is not None: stat_entry = rdf_client_fs.StatEntry.FromSerializedString( stat_entry_bytes) else: stat_entry = None if hash_entry_bytes is not None: hash_entry = rdf_crypto.Hash.FromSerializedString( hash_entry_bytes) else: hash_entry = None path_info = rdf_objects.PathInfo( path_type=path_type, components=components, stat_entry=stat_entry, hash_entry=hash_entry, timestamp=mysql_utils.TimestampToRDFDatetime(timestamp)) path_infos[components].append(path_info) for components in components_list: path_infos[components].sort( key=lambda path_info: path_info.timestamp) return path_infos
def ListDescendentPathInfos(self, client_id, path_type, components, timestamp=None, max_depth=None, cursor=None): """Lists path info records that correspond to descendants of given path.""" path_infos = [] query = "" path = mysql_utils.ComponentsToPath(components) values = { "client_id": db_utils.ClientIDToInt(client_id), "path_type": int(path_type), "path": db_utils.EscapeWildcards(path), } query += """ SELECT path, directory, UNIX_TIMESTAMP(p.timestamp), stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp), hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp) FROM client_paths AS p """ if timestamp is None: query += """ LEFT JOIN client_path_stat_entries AS s ON (p.client_id = s.client_id AND p.path_type = s.path_type AND p.path_id = s.path_id AND p.last_stat_entry_timestamp = s.timestamp) LEFT JOIN client_path_hash_entries AS h ON (p.client_id = h.client_id AND p.path_type = h.path_type AND p.path_id = h.path_id AND p.last_hash_entry_timestamp = h.timestamp) """ only_explicit = False else: query += """ LEFT JOIN (SELECT sr.client_id, sr.path_type, sr.path_id, sr.stat_entry FROM client_path_stat_entries AS sr INNER JOIN (SELECT client_id, path_type, path_id, MAX(timestamp) AS max_timestamp FROM client_path_stat_entries WHERE UNIX_TIMESTAMP(timestamp) <= %(timestamp)s GROUP BY client_id, path_type, path_id) AS st ON sr.client_id = st.client_id AND sr.path_type = st.path_type AND sr.path_id = st.path_id AND sr.timestamp = st.max_timestamp) AS s ON (p.client_id = s.client_id AND p.path_type = s.path_type AND p.path_id = s.path_id) LEFT JOIN (SELECT hr.client_id, hr.path_type, hr.path_id, hr.hash_entry FROM client_path_hash_entries AS hr INNER JOIN (SELECT client_id, path_type, path_id, MAX(timestamp) AS max_timestamp FROM client_path_hash_entries WHERE UNIX_TIMESTAMP(timestamp) <= %(timestamp)s GROUP BY client_id, path_type, path_id) AS ht ON hr.client_id = ht.client_id AND hr.path_type = ht.path_type AND hr.path_id = ht.path_id AND hr.timestamp = ht.max_timestamp) AS h ON (p.client_id = h.client_id AND p.path_type = h.path_type AND p.path_id = h.path_id) """ values["timestamp"] = mysql_utils.RDFDatetimeToTimestamp(timestamp) only_explicit = True query += """ WHERE p.client_id = %(client_id)s AND p.path_type = %(path_type)s AND path LIKE concat(%(path)s, '/%%') """ if max_depth is not None: query += """ AND depth <= %(depth)s """ values["depth"] = len(components) + max_depth cursor.execute(query, values) for row in cursor.fetchall(): # pyformat: disable (path, directory, timestamp, stat_entry_bytes, last_stat_entry_timestamp, hash_entry_bytes, last_hash_entry_timestamp) = row # pyformat: enable components = mysql_utils.PathToComponents(path) if stat_entry_bytes is not None: stat_entry = rdf_client_fs.StatEntry.FromSerializedString( stat_entry_bytes) else: stat_entry = None if hash_entry_bytes is not None: hash_entry = rdf_crypto.Hash.FromSerializedString( hash_entry_bytes) else: hash_entry = None datetime = mysql_utils.TimestampToRDFDatetime path_info = rdf_objects.PathInfo( path_type=path_type, components=components, timestamp=datetime(timestamp), last_stat_entry_timestamp=datetime(last_stat_entry_timestamp), last_hash_entry_timestamp=datetime(last_hash_entry_timestamp), directory=directory, stat_entry=stat_entry, hash_entry=hash_entry) path_infos.append(path_info) path_infos.sort(key=lambda _: tuple(_.components)) # For specific timestamp, we return information only about explicit paths # (paths that have associated stat or hash entry or have an ancestor that is # explicit). if not only_explicit: return path_infos explicit_path_infos = [] has_explicit_ancestor = set() # This list is sorted according to the keys component, so by traversing it # in the reverse order we make sure that we process deeper paths first. for path_info in reversed(path_infos): components = tuple(path_info.components) if (path_info.HasField("stat_entry") or path_info.HasField("hash_entry") or components in has_explicit_ancestor): explicit_path_infos.append(path_info) has_explicit_ancestor.add(components[:-1]) # Since we collected explicit paths in reverse order, we need to reverse it # again to conform to the interface. return list(reversed(explicit_path_infos))
def ReadPathInfo(self, client_id, path_type, components, timestamp=None, cursor=None): """Retrieves a path info record for a given path.""" if timestamp is None: path_infos = self.ReadPathInfos(client_id, path_type, [components]) path_info = path_infos[components] if path_info is None: raise db.UnknownPathError(client_id=client_id, path_type=path_type, components=components) return path_info query = """ SELECT directory, UNIX_TIMESTAMP(p.timestamp), stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp), hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp) FROM client_paths as p LEFT JOIN (SELECT client_id, path_type, path_id, stat_entry FROM client_path_stat_entries WHERE client_id = %(client_id)s AND path_type = %(path_type)s AND path_id = %(path_id)s AND UNIX_TIMESTAMP(timestamp) <= %(timestamp)s ORDER BY timestamp DESC LIMIT 1) AS s ON p.client_id = s.client_id AND p.path_type = s.path_type AND p.path_id = s.path_id LEFT JOIN (SELECT client_id, path_type, path_id, hash_entry FROM client_path_hash_entries WHERE client_id = %(client_id)s AND path_type = %(path_type)s AND path_id = %(path_id)s AND UNIX_TIMESTAMP(timestamp) <= %(timestamp)s ORDER BY timestamp DESC LIMIT 1) AS h ON p.client_id = h.client_id AND p.path_type = h.path_type AND p.path_id = h.path_id WHERE p.client_id = %(client_id)s AND p.path_type = %(path_type)s AND p.path_id = %(path_id)s """ values = { "client_id": db_utils.ClientIDToInt(client_id), "path_type": int(path_type), "path_id": rdf_objects.PathID.FromComponents(components).AsBytes(), "timestamp": mysql_utils.RDFDatetimeToTimestamp(timestamp), } cursor.execute(query, values) row = cursor.fetchone() if row is None: raise db.UnknownPathError(client_id=client_id, path_type=path_type, components=components) # pyformat: disable (directory, timestamp, stat_entry_bytes, last_stat_entry_timestamp, hash_entry_bytes, last_hash_entry_timestamp) = row # pyformat: enable if stat_entry_bytes is not None: stat_entry = rdf_client_fs.StatEntry.FromSerializedString( stat_entry_bytes) else: stat_entry = None if hash_entry_bytes is not None: hash_entry = rdf_crypto.Hash.FromSerializedString(hash_entry_bytes) else: hash_entry = None datetime = mysql_utils.TimestampToRDFDatetime return rdf_objects.PathInfo( path_type=path_type, components=components, timestamp=datetime(timestamp), last_stat_entry_timestamp=datetime(last_stat_entry_timestamp), last_hash_entry_timestamp=datetime(last_hash_entry_timestamp), directory=directory, stat_entry=stat_entry, hash_entry=hash_entry)
def _MultiWritePathInfos(self, path_infos, cursor=None): """Writes a collection of path info records for specified clients.""" path_info_count = 0 path_info_values = [] parent_path_info_count = 0 parent_path_info_values = [] has_stat_entries = False has_hash_entries = False for client_id, client_path_infos in iteritems(path_infos): for path_info in client_path_infos: path = mysql_utils.ComponentsToPath(path_info.components) path_info_values.append(db_utils.ClientIDToInt(client_id)) path_info_values.append(int(path_info.path_type)) path_info_values.append(path_info.GetPathID().AsBytes()) path_info_values.append(path) path_info_values.append(bool(path_info.directory)) path_info_values.append(len(path_info.components)) if path_info.HasField("stat_entry"): path_info_values.append( path_info.stat_entry.SerializeToString()) has_stat_entries = True else: path_info_values.append(None) if path_info.HasField("hash_entry"): path_info_values.append( path_info.hash_entry.SerializeToString()) path_info_values.append( path_info.hash_entry.sha256.AsBytes()) has_hash_entries = True else: path_info_values.append(None) path_info_values.append(None) path_info_count += 1 # TODO(hanuszczak): Implement a trie in order to avoid inserting # duplicated records. for parent_path_info in path_info.GetAncestors(): path = mysql_utils.ComponentsToPath( parent_path_info.components) parent_path_info_values.append( db_utils.ClientIDToInt(client_id)) parent_path_info_values.append( int(parent_path_info.path_type)) parent_path_info_values.append( parent_path_info.GetPathID().AsBytes()) parent_path_info_values.append(path) parent_path_info_values.append( len(parent_path_info.components)) parent_path_info_count += 1 with mysql_utils.TemporaryTable( cursor=cursor, name="client_path_infos", columns=[ ("client_id", "BIGINT UNSIGNED NOT NULL"), ("path_type", "INT UNSIGNED NOT NULL"), ("path_id", "BINARY(32) NOT NULL"), ("path", "TEXT NOT NULL"), ("directory", "BOOLEAN NOT NULL"), ("depth", "INT NOT NULL"), ("stat_entry", "MEDIUMBLOB NULL"), ("hash_entry", "MEDIUMBLOB NULL"), ("sha256", "BINARY(32) NULL"), ("timestamp", "TIMESTAMP(6) NOT NULL DEFAULT now(6)"), ]): if path_info_count > 0: query = """ INSERT INTO client_path_infos(client_id, path_type, path_id, path, directory, depth, stat_entry, hash_entry, sha256) VALUES {} """.format(mysql_utils.Placeholders(num=9, values=path_info_count)) cursor.execute(query, path_info_values) cursor.execute(""" INSERT INTO client_paths(client_id, path_type, path_id, path, directory, depth) SELECT client_id, path_type, path_id, path, directory, depth FROM client_path_infos ON DUPLICATE KEY UPDATE client_paths.directory = (client_paths.directory OR VALUES(client_paths.directory)), client_paths.timestamp = now(6) """) if parent_path_info_count > 0: placeholders = ["(%s, %s, %s, %s, TRUE, %s)" ] * parent_path_info_count cursor.execute( """ INSERT INTO client_paths(client_id, path_type, path_id, path, directory, depth) VALUES {} ON DUPLICATE KEY UPDATE directory = TRUE, timestamp = now() """.format(", ".join(placeholders)), parent_path_info_values) if has_stat_entries: cursor.execute(""" INSERT INTO client_path_stat_entries(client_id, path_type, path_id, stat_entry, timestamp) SELECT client_id, path_type, path_id, stat_entry, timestamp FROM client_path_infos WHERE stat_entry IS NOT NULL """) cursor.execute(""" UPDATE client_paths, client_path_infos SET client_paths.last_stat_entry_timestamp = client_path_infos.timestamp WHERE client_paths.client_id = client_path_infos.client_id AND client_paths.path_type = client_path_infos.path_type AND client_paths.path_id = client_path_infos.path_id AND client_path_infos.stat_entry IS NOT NULL """) if has_hash_entries: cursor.execute(""" INSERT INTO client_path_hash_entries(client_id, path_type, path_id, hash_entry, sha256, timestamp) SELECT client_id, path_type, path_id, hash_entry, sha256, timestamp FROM client_path_infos WHERE hash_entry IS NOT NULL """) cursor.execute(""" UPDATE client_paths, client_path_infos SET client_paths.last_hash_entry_timestamp = client_path_infos.timestamp WHERE client_paths.client_id = client_path_infos.client_id AND client_paths.path_type = client_path_infos.path_type AND client_paths.path_id = client_path_infos.path_id AND client_path_infos.hash_entry IS NOT NULL """)
def ReadPathInfos(self, client_id, path_type, components_list, cursor=None): """Retrieves path info records for given paths.""" if not components_list: return {} path_ids = list(map(rdf_objects.PathID.FromComponents, components_list)) path_infos = {components: None for components in components_list} query = """ SELECT path, directory, UNIX_TIMESTAMP(client_paths.timestamp), stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp), hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp) FROM client_paths LEFT JOIN client_path_stat_entries ON (client_paths.client_id = client_path_stat_entries.client_id AND client_paths.path_type = client_path_stat_entries.path_type AND client_paths.path_id = client_path_stat_entries.path_id AND client_paths.last_stat_entry_timestamp = client_path_stat_entries.timestamp) LEFT JOIN client_path_hash_entries ON (client_paths.client_id = client_path_hash_entries.client_id AND client_paths.path_type = client_path_hash_entries.path_type AND client_paths.path_id = client_path_hash_entries.path_id AND client_paths.last_hash_entry_timestamp = client_path_hash_entries.timestamp) WHERE client_paths.client_id = %(client_id)s AND client_paths.path_type = %(path_type)s AND client_paths.path_id IN %(path_ids)s """ values = { "client_id": db_utils.ClientIDToInt(client_id), "path_type": int(path_type), "path_ids": [path_id.AsBytes() for path_id in path_ids] } cursor.execute(query, values) for row in cursor.fetchall(): # pyformat: disable (path, directory, timestamp, stat_entry_bytes, last_stat_entry_timestamp, hash_entry_bytes, last_hash_entry_timestamp) = row # pyformat: enable components = mysql_utils.PathToComponents(path) if stat_entry_bytes is not None: stat_entry = rdf_client_fs.StatEntry.FromSerializedString( stat_entry_bytes) else: stat_entry = None if hash_entry_bytes is not None: hash_entry = rdf_crypto.Hash.FromSerializedString( hash_entry_bytes) else: hash_entry = None datetime = mysql_utils.TimestampToRDFDatetime path_info = rdf_objects.PathInfo( path_type=path_type, components=components, timestamp=datetime(timestamp), last_stat_entry_timestamp=datetime(last_stat_entry_timestamp), last_hash_entry_timestamp=datetime(last_hash_entry_timestamp), directory=directory, stat_entry=stat_entry, hash_entry=hash_entry) path_infos[components] = path_info return path_infos
def _MultiWritePathInfos(self, path_infos, cursor=None): """Writes a collection of path info records for specified clients.""" now = rdfvalue.RDFDatetime.Now() path_info_values = [] parent_path_info_values = [] stat_entry_keys = [] stat_entry_values = [] hash_entry_keys = [] hash_entry_values = [] for client_id, client_path_infos in iteritems(path_infos): for path_info in client_path_infos: path = mysql_utils.ComponentsToPath(path_info.components) key = ( db_utils.ClientIDToInt(client_id), int(path_info.path_type), path_info.GetPathID().AsBytes(), ) path_info_values.append(key + ( mysql_utils.RDFDatetimeToTimestamp(now), path, bool(path_info.directory), len(path_info.components))) if path_info.HasField("stat_entry"): stat_entry_keys.extend(key) stat_entry_values.append( key + (mysql_utils.RDFDatetimeToTimestamp(now), path_info.stat_entry.SerializeToBytes())) if path_info.HasField("hash_entry"): hash_entry_keys.extend(key) hash_entry_values.append( key + (mysql_utils.RDFDatetimeToTimestamp(now), path_info.hash_entry.SerializeToBytes(), path_info.hash_entry.sha256.AsBytes())) # TODO(hanuszczak): Implement a trie in order to avoid inserting # duplicated records. for parent_path_info in path_info.GetAncestors(): path = mysql_utils.ComponentsToPath( parent_path_info.components) parent_path_info_values.append(( db_utils.ClientIDToInt(client_id), int(parent_path_info.path_type), parent_path_info.GetPathID().AsBytes(), path, len(parent_path_info.components), )) if path_info_values: query = """ INSERT INTO client_paths(client_id, path_type, path_id, timestamp, path, directory, depth) VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s, %s, %s) ON DUPLICATE KEY UPDATE timestamp = VALUES(timestamp), directory = directory OR VALUES(directory) """ cursor.executemany(query, path_info_values) if parent_path_info_values: query = """ INSERT INTO client_paths(client_id, path_type, path_id, path, directory, depth) VALUES (%s, %s, %s, %s, TRUE, %s) ON DUPLICATE KEY UPDATE directory = TRUE, timestamp = NOW(6) """ cursor.executemany(query, parent_path_info_values) if stat_entry_values: query = """ INSERT INTO client_path_stat_entries(client_id, path_type, path_id, timestamp, stat_entry) VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s) """ cursor.executemany(query, stat_entry_values) condition = "(client_id = %s AND path_type = %s AND path_id = %s)" query = """ UPDATE client_paths SET last_stat_entry_timestamp = FROM_UNIXTIME(%s) WHERE {} """.format(" OR ".join([condition] * len(stat_entry_values))) params = [mysql_utils.RDFDatetimeToTimestamp(now) ] + stat_entry_keys cursor.execute(query, params) if hash_entry_values: query = """ INSERT INTO client_path_hash_entries(client_id, path_type, path_id, timestamp, hash_entry, sha256) VALUES (%s, %s, %s, FROM_UNIXTIME(%s), %s, %s) """ cursor.executemany(query, hash_entry_values) condition = "(client_id = %s AND path_type = %s AND path_id = %s)" query = """ UPDATE client_paths SET last_hash_entry_timestamp = FROM_UNIXTIME(%s) WHERE {} """.format(" OR ".join([condition] * len(hash_entry_values))) params = [mysql_utils.RDFDatetimeToTimestamp(now) ] + hash_entry_keys cursor.execute(query, params)