def ReadPathInfos(self, client_id, path_type, components_list, cursor=None): """Retrieves path info records for given paths.""" if not components_list: return {} path_ids = list(map(rdf_objects.PathID.FromComponents, components_list)) path_infos = {components: None for components in components_list} query = """ SELECT path, directory, UNIX_TIMESTAMP(client_paths.timestamp), stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp), hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp) FROM client_paths LEFT JOIN client_path_stat_entries ON (client_paths.client_id = client_path_stat_entries.client_id AND client_paths.path_type = client_path_stat_entries.path_type AND client_paths.path_id = client_path_stat_entries.path_id AND client_paths.last_stat_entry_timestamp = client_path_stat_entries.timestamp) LEFT JOIN client_path_hash_entries ON (client_paths.client_id = client_path_hash_entries.client_id AND client_paths.path_type = client_path_hash_entries.path_type AND client_paths.path_id = client_path_hash_entries.path_id AND client_paths.last_hash_entry_timestamp = client_path_hash_entries.timestamp) WHERE client_paths.client_id = %(client_id)s AND client_paths.path_type = %(path_type)s AND client_paths.path_id IN %(path_ids)s """ values = { "client_id": db_utils.ClientIDToInt(client_id), "path_type": int(path_type), "path_ids": [path_id.AsBytes() for path_id in path_ids] } cursor.execute(query, values) for row in cursor.fetchall(): # pyformat: disable (path, directory, timestamp, stat_entry_bytes, last_stat_entry_timestamp, hash_entry_bytes, last_hash_entry_timestamp) = row # pyformat: enable components = mysql_utils.PathToComponents(path) if stat_entry_bytes is not None: stat_entry = rdf_client_fs.StatEntry.FromSerializedString( stat_entry_bytes) else: stat_entry = None if hash_entry_bytes is not None: hash_entry = rdf_crypto.Hash.FromSerializedString( hash_entry_bytes) else: hash_entry = None datetime = mysql_utils.TimestampToRDFDatetime path_info = rdf_objects.PathInfo( path_type=path_type, components=components, timestamp=datetime(timestamp), last_stat_entry_timestamp=datetime(last_stat_entry_timestamp), last_hash_entry_timestamp=datetime(last_hash_entry_timestamp), directory=directory, stat_entry=stat_entry, hash_entry=hash_entry) path_infos[components] = path_info return path_infos
def ListDescendentPathInfos(self, client_id, path_type, components, timestamp=None, max_depth=None, cursor=None): """Lists path info records that correspond to descendants of given path.""" path_infos = [] query = "" path = mysql_utils.ComponentsToPath(components) values = { "client_id": db_utils.ClientIDToInt(client_id), "path_type": int(path_type), "path": db_utils.EscapeWildcards(path), } query += """ SELECT path, directory, UNIX_TIMESTAMP(p.timestamp), stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp), hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp) FROM client_paths AS p """ if timestamp is None: query += """ LEFT JOIN client_path_stat_entries AS s ON (p.client_id = s.client_id AND p.path_type = s.path_type AND p.path_id = s.path_id AND p.last_stat_entry_timestamp = s.timestamp) LEFT JOIN client_path_hash_entries AS h ON (p.client_id = h.client_id AND p.path_type = h.path_type AND p.path_id = h.path_id AND p.last_hash_entry_timestamp = h.timestamp) """ only_explicit = False else: query += """ LEFT JOIN (SELECT sr.client_id, sr.path_type, sr.path_id, sr.stat_entry FROM client_path_stat_entries AS sr INNER JOIN (SELECT client_id, path_type, path_id, MAX(timestamp) AS max_timestamp FROM client_path_stat_entries WHERE UNIX_TIMESTAMP(timestamp) <= %(timestamp)s GROUP BY client_id, path_type, path_id) AS st ON sr.client_id = st.client_id AND sr.path_type = st.path_type AND sr.path_id = st.path_id AND sr.timestamp = st.max_timestamp) AS s ON (p.client_id = s.client_id AND p.path_type = s.path_type AND p.path_id = s.path_id) LEFT JOIN (SELECT hr.client_id, hr.path_type, hr.path_id, hr.hash_entry FROM client_path_hash_entries AS hr INNER JOIN (SELECT client_id, path_type, path_id, MAX(timestamp) AS max_timestamp FROM client_path_hash_entries WHERE UNIX_TIMESTAMP(timestamp) <= %(timestamp)s GROUP BY client_id, path_type, path_id) AS ht ON hr.client_id = ht.client_id AND hr.path_type = ht.path_type AND hr.path_id = ht.path_id AND hr.timestamp = ht.max_timestamp) AS h ON (p.client_id = h.client_id AND p.path_type = h.path_type AND p.path_id = h.path_id) """ values["timestamp"] = mysql_utils.RDFDatetimeToTimestamp(timestamp) only_explicit = True query += """ WHERE p.client_id = %(client_id)s AND p.path_type = %(path_type)s AND path LIKE concat(%(path)s, '/%%') """ if max_depth is not None: query += """ AND depth <= %(depth)s """ values["depth"] = len(components) + max_depth cursor.execute(query, values) for row in cursor.fetchall(): # pyformat: disable (path, directory, timestamp, stat_entry_bytes, last_stat_entry_timestamp, hash_entry_bytes, last_hash_entry_timestamp) = row # pyformat: enable components = mysql_utils.PathToComponents(path) if stat_entry_bytes is not None: stat_entry = rdf_client_fs.StatEntry.FromSerializedString( stat_entry_bytes) else: stat_entry = None if hash_entry_bytes is not None: hash_entry = rdf_crypto.Hash.FromSerializedString( hash_entry_bytes) else: hash_entry = None datetime = mysql_utils.TimestampToRDFDatetime path_info = rdf_objects.PathInfo( path_type=path_type, components=components, timestamp=datetime(timestamp), last_stat_entry_timestamp=datetime(last_stat_entry_timestamp), last_hash_entry_timestamp=datetime(last_hash_entry_timestamp), directory=directory, stat_entry=stat_entry, hash_entry=hash_entry) path_infos.append(path_info) path_infos.sort(key=lambda _: tuple(_.components)) # For specific timestamp, we return information only about explicit paths # (paths that have associated stat or hash entry or have an ancestor that is # explicit). if not only_explicit: return path_infos explicit_path_infos = [] has_explicit_ancestor = set() # This list is sorted according to the keys component, so by traversing it # in the reverse order we make sure that we process deeper paths first. for path_info in reversed(path_infos): components = tuple(path_info.components) if (path_info.HasField("stat_entry") or path_info.HasField("hash_entry") or components in has_explicit_ancestor): explicit_path_infos.append(path_info) has_explicit_ancestor.add(components[:-1]) # Since we collected explicit paths in reverse order, we need to reverse it # again to conform to the interface. return list(reversed(explicit_path_infos))
def ReadPathInfos(self, client_id, path_type, components_list, cursor=None): """Retrieves path info records for given paths.""" if not components_list: return {} path_ids = list(map(rdf_objects.PathID.FromComponents, components_list)) path_infos = {components: None for components in components_list} query = """ SELECT path, directory, UNIX_TIMESTAMP(client_paths.timestamp), stat_entry, UNIX_TIMESTAMP(last_stat_entry_timestamp), hash_entry, UNIX_TIMESTAMP(last_hash_entry_timestamp) FROM client_paths LEFT JOIN client_path_stat_entries ON (client_paths.client_id = client_path_stat_entries.client_id AND client_paths.path_type = client_path_stat_entries.path_type AND client_paths.path_id = client_path_stat_entries.path_id AND client_paths.last_stat_entry_timestamp = client_path_stat_entries.timestamp) LEFT JOIN client_path_hash_entries ON (client_paths.client_id = client_path_hash_entries.client_id AND client_paths.path_type = client_path_hash_entries.path_type AND client_paths.path_id = client_path_hash_entries.path_id AND client_paths.last_hash_entry_timestamp = client_path_hash_entries.timestamp) WHERE client_paths.client_id = %s AND client_paths.path_type = %s AND client_paths.path_id IN ({}) """.format(", ".join(["%s"] * len(path_ids))) # NOTE: passing tuples as cursor.execute arguments is broken in # mysqldbclient==1.3.10 # (see https://github.com/PyMySQL/mysqlclient-python/issues/145) # and is considered unmaintained. values = [ db_utils.ClientIDToInt(client_id), int(path_type), ] + [path_id.AsBytes() for path_id in path_ids] cursor.execute(query, values) for row in cursor.fetchall(): # pyformat: disable (path, directory, timestamp, stat_entry_bytes, last_stat_entry_timestamp, hash_entry_bytes, last_hash_entry_timestamp) = row # pyformat: enable components = mysql_utils.PathToComponents(path) if stat_entry_bytes is not None: stat_entry = rdf_client_fs.StatEntry.FromSerializedBytes( stat_entry_bytes) else: stat_entry = None if hash_entry_bytes is not None: hash_entry = rdf_crypto.Hash.FromSerializedBytes(hash_entry_bytes) else: hash_entry = None datetime = mysql_utils.TimestampToRDFDatetime path_info = rdf_objects.PathInfo( path_type=path_type, components=components, timestamp=datetime(timestamp), last_stat_entry_timestamp=datetime(last_stat_entry_timestamp), last_hash_entry_timestamp=datetime(last_hash_entry_timestamp), directory=directory, stat_entry=stat_entry, hash_entry=hash_entry) path_infos[components] = path_info return path_infos