class PerformanceTools: def __init__(self, options={}): prefix = os.path.join(os.environ.get('PREFIX', "")) perfdir = f"{prefix}/var/log/dirsrv/perfdir" print(f"Results and logs are stored in {perfdir} directory.") self._options = { 'nbUsers': 10000, 'seed': 'lib389PerfTools', 'resultDir': perfdir, 'suffix': DEFAULT_SUFFIX, **options } seed(self._options['seed']) self._instance = None os.makedirs(perfdir, mode=0o755, exist_ok=True) self._ldclt_template = self.getFilePath("template.ldclt") # Generate a dummy template anyway we do not plan to create entries with open(self._ldclt_template, "w") as f: f.write("objectclass: inetOrgPerson\n") self._users_parents_dn = f"ou=People,{self._options['suffix']}" @staticmethod def log2Csv(fname, fout): # Convert (verbose) log file into csv file (easier for comparing the results) map = {} # ( nb_users, name, nbthreads, db_lib) Tuple to Result map names = {} # { Name : None } Map has_threads = {} # { Name : { threads : { users : users } } } Map # Read log file maxmes = 0 with open(fname) as f: for line in f: if (line[0] != '{'): continue if (line[-1] == '\n'): line = line[:-1] res = eval(line.replace('\n', '\\n')) nb_users = res['nbUsers'] db_lib = res['db_lib'] name = res['measure_name'] names[name] = None try: nbthreads = res['nb_threads'] except KeyError: nbthreads = "" if not name in has_threads: has_threads[name] = {} if not nbthreads in has_threads[name]: has_threads[name][nbthreads] = {} has_threads[name][nbthreads][nb_users] = nb_users key = (nb_users, name, nbthreads, db_lib) if not key in map: map[key] = [] m = map[key] m.append(res) if maxmes < len(m): maxmes = len(m) # Displays the result: by test name then by thread number then by users number # Generates all combinations keys = [] for name in sorted(names.keys()): for t in sorted(has_threads[name].keys()): for user in sorted(has_threads[name][t].keys()): keys.append((user, name, t)) #Generates the csv file sep = ";" with CsvFile(fout, 2 * maxmes + 2) as csv: csv.nf("test name") csv.nf("threads") csv.nf("users") for idx in range(maxmes): csv.nf("bdb") csv.nf("mdb") csv.nf("%") csv.nl() for k in keys: csv.nf(f"{k[1]}") csv.nf(f"{k[2]}") csv.nf(f"{k[0]}") k0 = (k[0], k[1], k[2], "bdb") k1 = (k[0], k[1], k[2], "mdb") for idx in range(maxmes): if k0 in map and idx < len(map[k0]): res = map[k0][idx] csv.nf(res['safemean']) else: csv.nf(None) if k1 in map and idx < len(map[k1]): res = map[k1][idx] csv.nf(res['safemean']) else: csv.nf(None) # Add % formula csv.nf(f"=({csv.ref(-1)}-{csv.ref(-2)})/{csv.ref(-2)}") csv.nl() def getFilePath(self, filename): return os.path.join(self._options['resultDir'], filename) def log(self, filename, msg): with open(self.getFilePath(filename), "at") as f: f.write(str(msg)) f.write("\n") def initInstance(self): if (self._instance): return self._instance uidpath = self.getFilePath("uids") nb_uids = 0 try: with open(uidpath, 'r') as f: while f.readline(): nb_uids += 1 except FileNotFoundError: pass nb_users = self._options['nbUsers'] need_rebuild = True if (nb_uids == nb_users): # Lets try to reuse existing instance try: self._instance = DirSrv(verbose=True) self._instance.local_simple_allocate(serverid="standalone1", password=PW_DM) self._instance.open() if (self._instance.exists()): if (self._instance.get_db_lib() == get_default_db_lib()): need_rebuild = False else: print( f"db is {self._instance.get_db_lib()} instead of {get_default_db_lib()} ==> instance must be rebuild" ) else: print(f"missing instance ==> instance must be rebuild") except Exception: pass else: print( f"Instance has {nb_uids} users instead of {nb_users} ==> instance must be rebuild" ) if (need_rebuild): print("Rebuilding standalone1 instance") # Should rebuild the instance from scratch topology = create_topology({ReplicaRole.STANDALONE: 1}) self._instance = topology.standalone # Adjust db size if needed (i.e about 670 K users) defaultDBsize = 1073741824 entrySize = 1600 # Real size is around 1525 if (self._instance.get_db_lib() == "mdb" and nb_users * entrySize > defaultDBsize): mdb_config = LMDB_LDBMConfig(self._instance) mdb_config.replace("nsslapd-mdb-max-size", str(nb_users * entrySize)) self._instance.restart() # Then populate the users useraccounts = UserAccounts(self._instance, self._options['suffix']) with open(uidpath, 'w') as f: uidgen = IdGeneratorWithNumbers(nb_users) cnGen = IdGeneratorWithNames(100) snGen = IdGeneratorWithNames(100) for uid in uidgen: cn = cnGen.random() sn = snGen.random() rdn = f"uid={uid}" osuid = uidgen.getIdx() + 1000 osgid = int(osuid % 100) + 1000 properties = { 'uid': uid, 'cn': cn, 'sn': sn, 'uidNumber': str(osuid), 'gidNumber': str(osgid), 'homeDirectory': f'/home/{uid}' } super(UserAccounts, useraccounts).create(rdn, properties) f.write(f'{uid}\n') return self._instance @staticmethod def filterMeasures(values, m, ecart): # keep values around m r = [] for val in values: if (val > (1 - ecart) * m and val < (1 + ecart) * m): r.append(val) return r def safeMeasures(self, values, ecart=0.2): v = values try: r = PerformanceTools.filterMeasures(values, statistics.mean(v), ecart) while (r != v): v = r r = PerformanceTools.filterMeasures(values, statistics.mean(v), ecart) if (len(r) == 0): return values return r except statistics.StatisticsError as e: self.log("log", str(e)) print(e) return values # Return a dict about the evironment data def getEnvInfo(self): mem = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') / (1024. **3) with open('/etc/redhat-release') as f: release = f.read() return { "db_lib": self._instance.get_db_lib(), "nb_cpus": multiprocessing.cpu_count(), "total mem": mem, "release": str(release), **self._options } def finalizeResult(self, res): try: rawres = res["rawresults"] res["rawmean"] = statistics.mean(rawres) res["saferesults"] = self.safeMeasures( rawres) # discard first measure result res["safemean"] = statistics.mean(res["saferesults"]) pretty_res_keys = [ 'start_time', 'stop_time', 'measure_name', 'safemean', 'db_lib', 'nbUsers', 'nb_threads' ] pretty_res = dict( filter(lambda elem: elem[0] in pretty_res_keys, res.items())) except statistics.StatisticsError as e: print(e) res["exception"] = e pretty_res = "#ERROR" res["pretty"] = pretty_res self.log("out", res["pretty"]) self.log("log", res) return res def ldclt(self, measure_name, args, nbThreads=10, nbMes=10): # First ldclt measure is always bad so do 1 measure more # and discard it from final result nbMes += 1 prog = os.path.join(self._instance.ds_paths.bin_dir, 'ldclt') cmd = [ prog, '-h', f'{self._instance.host}', '-p', f'{self._instance.port}', '-D', f'{self._instance.binddn}', '-w', f'{self._instance.bindpw}', '-N', str(nbMes), '-n', str(nbThreads) ] for key in args.keys(): cmd.append(str(key)) val = args[key] if (val): cmd.append(str(val)) start_time = time.time() tmout = 30 + 10 * nbMes print(f"Running ldclt with a timeout of {tmout} seconds ...\r") try: result = subprocess.run(args=cmd, capture_output=True, timeout=tmout) except subprocess.CalledProcessError as e: self.log( "log", f'{e.cmd} failed. measure: {measure_name}\n' + f'instance: {self._instance.serverid}\n' + f'return code is {e.returncode}.\n' + f'stdout: {e.stdout}\n' + f'stderr: {e.stderr}\n') raise e print(" Done.") stop_time = time.time() # Lets parse the result res = { "measure_name": measure_name, "cmd": cmd, "stdout": result.stdout, "stderr": result.stderr, "returncode": result.returncode, "start_time": start_time, "stop_time": stop_time, "stop_time": stop_time, "nb_threads": nbThreads, **self.getEnvInfo() } rawres = re.findall(r'Average rate: [^ ]*\s*.([^/]*)', str(result.stdout)) rawres = [float(i) for i in rawres] res["measure0"] = rawres[0] res["rawresults"] = rawres[1:] # Discard first measure return self.finalizeResult(res) def measure_search_by_uid(self, name, nb_threads=1): nb_users = self._options['nbUsers'] args = { "-b": self._users_parents_dn, "-f": "uid=XXXXXXXXXX", "-e": "esearch,random", "-r0": None, f"-R{nb_users-1}": None } return self.ldclt(name, args, nbThreads=nb_threads) # I wish I could make the base dn vary rather than use the dn in filter # but I did not find how to do that (the RDN trick as in modify # generates the same search than measure_search_by_uid test) def measure_search_by_filtering_the_dn(self, name, nb_threads=1): nb_users = self._options['nbUsers'] args = { "-b": self._users_parents_dn, "-f": "uid:dn:=XXXXXXXXXX", "-e": "esearch,random", "-r0": None, f"-R{nb_users-1}": None } return self.ldclt(name, args, nbThreads=nb_threads) def measure_modify(self, name, nb_threads=1): nb_users = self._options['nbUsers'] args = { "-b": self._users_parents_dn, "-e": f"rdn=uid:[RNDN(0;{nb_users-1};10)],object={self._ldclt_template},attreplace=sn: random modify XXXXX" } return self.ldclt(name, args, nbThreads=nb_threads) def offline_export(self): start_time = time.time() assert (self._instance.db2ldif(DEFAULT_BENAME, (self._options['suffix'], ), None, None, None, self._ldif)) stop_time = time.time() # Count entries in ldif file (if not already done) if not self._nbEntries: self._nbEntries = 0 with open(self._ldif) as f: for line in f: if (line.startswith("dn:")): self._nbEntries += 1 return self._nbEntries / (stop_time - start_time) def offline_import(self): start_time = time.time() assert (self._instance.ldif2db(DEFAULT_BENAME, None, None, None, self._ldif)) stop_time = time.time() return self._nbEntries / (stop_time - start_time) def _do_measure(self, measure_name, measure_cb, nbMes): # Perform non ldcltl measure # first_time = time.time() rawres = [] for m in range(nbMes): try: rawres.append(measure_cb()) stop_time = time.time() except AssertionError: continue last_time = time.time() # Lets parse the result res = { "measure_name": measure_name, "start_time": first_time, "stop_time": last_time, "nb_measures": nbMes, "rawresults": rawres, **self.getEnvInfo() } return self.finalizeResult(res) def mesure_export_import(self, nbMes=10): self._instance.stop() self._ldif = self.getFilePath("db.ldif") self._nbEntries = None res = [ self._do_measure("export", self.offline_export, nbMes), self._do_measure("import", self.offline_import, nbMes) ] self._instance.start() return res class Tester: # Basic tester (used to define ldclt tests) def __init__(self, name, description, method_name): self._base_name = name self._base_description = description self._method_name = method_name def name(self): return self._base_name def argsused(self): return ["nb_threads", "name"] def description(self): return self._base_description def run(self, perftools, args): args['name'] = self._base_name res = getattr(perftools, self._method_name)(self._base_name, nb_threads=args['nb_threads']) print(res['pretty']) @staticmethod def initTester(args): os.environ["NSSLAPD_DB_LIB"] = args['db_lib'] perftools = PerformanceTools(args) perftools.initInstance() return perftools class TesterImportExport(Tester): # A special tester for export/import def __init__(self): super().__init__( "export/import", "Measure export rate in entries per seconds then measure import rate.", None) def argsused(self): return [] def run(self, perftools, args=None): res = perftools.mesure_export_import() for r in res: print(r['pretty']) @staticmethod def listTests(): # List of test for which args.nb_threads is useful return { t.name(): t for t in [ PerformanceTools.Tester( "search_uid", "Measure number of searches per seconds using filter with random existing uid.", "measure_search_by_uid"), PerformanceTools.Tester( "search_uid_in_dn", "Measure number of searches per seconds using filter with random existing uid in dn (i.e: (uid:dn:uid_value)).", "measure_search_by_filtering_the_dn"), PerformanceTools.Tester( "modify_sn", "Measure number of modify per seconds replacing sn by random value on random entries.", "measure_modify"), PerformanceTools.TesterImportExport(), ] } @staticmethod def runAllTests(options): for users in (100, 1000, 10000, 100000, 1000000): for db in ('bdb', 'mdb'): perftools = PerformanceTools.Tester.initTester({ **options, 'nbUsers': users, 'db_lib': db }) for t in PerformanceTools.listTests().values(): if 'nb_threads' in t.argsused(): for nbthreads in (1, 4, 8): t.run(perftools, {"nb_threads": nbthreads}) else: t.run(perftools)