def getOverLap(keyword, setups): print(f'\n[CALLBACK] Checking Types for {keyword}') print('\tSETUPS', setups) if setups is not None and len(setups) != 0: conf0, conf1 = getConfigs(setups) print(f'\tGetting {keyword} for:\n' f'\t{conf0.name}\n' f'\t{conf1.name}\n') # TODO: Watch out for keyword change, as this is a dynamic key opt0 = Result.objects(config=conf0).distinct(f'dynamic_{keyword}') opt1 = Result.objects(config=conf1).distinct(f'dynamic_{keyword}') overlap = set(opt0) & set(opt1) checkboxes = [] selected = [] for value in overlap: checkboxes.append({'label': value, 'value': value}) selected.append(value) return sorted(checkboxes, key=lambda c: c['label']), sorted(selected) else: return [], []
def _retrieveDataAndBuildSpeedupTable(setups): print('\n[CALLBACK] Retrieving Data') if setups is None or setups == []: return [None, None] conf0, conf1 = getConfigs(setups) # Get all results for both configs results0 = Result.objects(config=conf0).hint('config_hashed') results1 = Result.objects(config=conf1).hint('config_hashed') missing_results = 0 start = time.time() df0 = aggregate_results(results0) df1 = aggregate_results(results1) print(f'\tAggregated in {(time.time() - start)} seconds') def calculate_speedup(data0: pd.DataFrame, data1: pd.DataFrame) -> pd.DataFrame: """ Return Dataframe containing all matched configs and the speedup Args: data0: aggregated result dataframe commit0 data1: aggregated result dataframe commit1 Returns: table: dataframe with speedup table """ quantity = 'minTime' # TODO: watch out for no matches when adding other timing quants into dataframe all_data1_configs = data1.drop(columns=quantity) # Drop column for matching table = all_data1_configs.copy() for i_search in range(len(data0)): search_config = data0.loc[i_search, data0.columns != quantity] # Select row except time column full_match = (all_data1_configs == search_config).all(axis=1) # Checks against all df1 rows and marks if full row matches df0 row, except z column i_match = full_match[full_match == True].index[0] # Get index of the full match in data1 speedup = data0.loc[i_match, quantity] / data1.loc[i_search, quantity] # label = ''.join(str(v) + ' ' for v in data1.loc[i_match, :].values) label = ''.join([f'{str(v):>10} ' for v in data1.loc[i_match, :].values]) table.loc[i_match, 'quantity'] = data1.loc[i_match, quantity] table.loc[i_match, 'speedup'] = speedup table.loc[i_match, 'label'] = label return table speedupTable = calculate_speedup(df0, df1).sort_values('speedup') #speedupTable = calculate_speedup(df0, df1).sort_values('quantity') return [speedupTable.to_json(), setups]
def generatePlot(self): """ Quick overview plot for commit :return: """ try: imgur = ImgurUploader() confs = Config.objects(commitSHA=self.sha) images = [] # Multiple Plots if more than one config was run conf: Config for conf in confs: results = Result.objects(config=conf) means = np.array([r.meanTime for r in results]) mins = np.array([r.minTime for r in results]) header, all_keys = get_dyn_keys(results) header_string = r'$\bf{' + header + '}$' labels = generate_label_table(results, all_keys) # Sort by minimum time sort_keys = np.argsort(mins)[::-1] sorted_means = means[sort_keys] sorted_mins = mins[sort_keys] sorted_labels = labels[sort_keys] sorted_labels = np.append(sorted_labels, header_string) fig = plt.figure(figsize=(15, len(means) / 4)) plt.gca().set_title(conf) plt.barh(np.arange(len(means)), sorted_means, label='mean') plt.barh(np.arange(len(means)), sorted_mins, label='min') plt.legend() plt.xlabel('nanoseconds') plt.xscale('log') plt.yticks(np.arange(len(sorted_labels)), sorted_labels) plt.grid(which='both', axis='x') plt.tight_layout() # Upload figure buf = io.BytesIO() fig.savefig(buf, format='png') buf.seek(0) link, hash = imgur.upload(buf.read()) conf.perfImgurLink = link conf.perfDeleteHash = hash conf.save() self.updateStatus(1, "PLOTTING", "PLOTTING succeeded\n", link) except Exception as e: self.updateStatus(-1, "PLOTTING", f"PLOTTING failed\n{e}") os.chdir(self.baseDir) return True
def singleResults(setup): print('\n[CALLBACK] Single Results') # Retrieve data start = time.time() conf = Config.objects().get(id=setup) results = Result.objects(config=conf).hint('config_hashed') df = aggregate_results(results) print(f'\tAggregated singles in {(time.time() - start)} seconds') return [df.to_json(), f'{conf.commitSHA[0:8]}: {conf.commitMessage}']
def getOptions(keyword, setup): print(f'\n[CALLBACK] Checking Types for {keyword}') print('\tSETUPS', setup) conf = Config.objects().get(id=setup) opts = Result.objects(config=conf).distinct(f'dynamic_{keyword}') checkboxes = [] selected = [] for val in opts: checkboxes.append({'label': val, 'value': val}) selected.append(val) return sorted(checkboxes, key=lambda c: c['label']), sorted(selected)
def _aggregateResults(config, sliderDict, sliderPos): print('[CALLBACK] Getting Results') if config is None or config == []: return None, None start = time.time() parsedSlider = pd.read_json(sliderDict) baseConf = Config.objects().get(id=config) baseRes = Result.objects(config=baseConf) base_df = aggregate_results(baseRes) compData = [] for i in range(sliderPos[0] + 1, sliderPos[1] + 1): # Get Matching Config for other SHA sha = parsedSlider.iloc[i].SHA try: conf = Config.objects().get(commitSHA=sha, setup=baseConf.setup) except me.MultipleObjectsReturned: conf = Config.objects( commitSHA=sha, setup=baseConf.setup).order_by('-id').first() except me.DoesNotExist: continue # Get Results df = aggregate_results(Result.objects(config=conf)) print(f'\t{len(df)}') if len(df) != 0: compData.append(df.to_json()) print(f'\tAggregated all results: {time.time() - start} seconds') return [base_df.to_json(), compData], [sliderPos, config]
def _dynFunction(setup): if setup is None or setup == []: return [], [] conf = Config.objects.get(id=setup) options = Result.objects(config=conf).distinct(f'dynamic_{keyword}') checkboxes = [] selected = [] for value in options: checkboxes.append({'label': value, 'value': value}) selected.append(value) return sorted(checkboxes, key=lambda c: c['label']), sorted(selected)
def parse_and_upload(self): print("uploading", self.mdFlexDir) try: cpu = get_cpu_info()["brand"] except Exception as e: print(f"Couldn't determine CPU brand: {e}") cpu = "N/A" run_timestamp = datetime.utcnow() coarse_pattern = re.compile( r'Collected times for\s+{(.*)}\s:\s\[(.*)\]') config_pattern = re.compile(r'([^,]+): ([^,]+)') times_pattern = re.compile(r'(\d+)') config_runs = coarse_pattern.findall( self.measure_output.stdout.decode('utf-8')) db_entry = Config() db_entry.name = 'performance via single tuning phase' # TODO: Keep name field? db_entry.date = run_timestamp db_entry.commitSHA = self.sha db_entry.commitMessage = self.repo.commit(self.sha).message db_entry.commitDate = self.repo.commit(self.sha).authored_datetime db_entry.mergedBaseSHA = self.baseSHA # Assumes tests were run on this system db_entry.system = cpu # Saving Setup used in perf script db_entry.setup = self.perfSetup # TODO: Decide if uniqueness is enforced (Change spare in models to False) # db_entry.unique = db_entry.name + db_entry.commitSHA + db_entry.system + str(db_entry.date) # try: # db_entry.save() # except NotUniqueError: # print("Exact Configuration for system and commit + date already saved!") # continue try: db_entry.save() except Exception as e: self.updateStatus(-1, "UPLOAD", str(e)) return False, f'Upload of config to DB failed {e}' print(db_entry) for run in config_runs: results = Result() results.config = db_entry # Filter all config parameters config = config_pattern.findall(run[0]) # Parsing output try: # Parsing Config keys and values for pair in config: key = pair[0].replace(' ', '') # Replace spaces key = 'dynamic_' + key # Adding prefix to clearly show dynamic field creation in DB quantity = pair[1].replace(' ', '') # Replace spaces try: # Try converting to float if appropriate quantity = float(quantity) except ValueError: pass print(key, quantity) results[key] = quantity # Parsing times times = times_pattern.findall(run[1]) times = [float(t) for t in times] results.measurements = times results.meanTime = np.mean(times) # Mean running Time results.minTime = np.min(times) # Min running Time except Exception as e: print(f'Parsing of measurement failed {e}') self.updateStatus(-1, "PARSING", str(e)) return False, f'Parsing failed with {e}' try: results.save() except Exception as e: self.updateStatus(-1, "UPLOAD", str(e)) return False, f'Upload of Result failed with {e}' print(results) os.chdir(self.baseDir) self.updateStatus(1, "UPLOAD", "RESULT UPLOAD succeeded\n") return True, 'Upload succeeded'
def _compareConfigs(self, base: Config, test: Config): """ Given two configs, find all overlapping results and compare them :param base: PR Base SHA config :param test: Commit in PR to compare to base :return: """ # Use base as common denominator and look for results containing the keys in base baseResults = Result.objects(config=base) testResults = Result.objects(config=test) missing_results_counter = 0 minSpeeds = [] meanSpeeds = [] matchedResults = [] for baseRes in baseResults: # Build dynamic keys dict dynamicFields = [ key for key in baseRes.__dict__['_fields_ordered'] if 'dynamic_' in key ] query = dict() for field in dynamicFields: query[field] = baseRes[field] # TODO: Change above to similar dict comprehension # dynamicQuery = {k: r0[k] for k in r0.__dict__['_fields_ordered'] if 'dynamic_' in k} # Get Results with matching settings (filter existing queryset) testRes = testResults.filter(**query) if len(testRes) == 0: missing_results_counter += 1 continue testRes = testRes.order_by( '-_id').first() # Get newest matching if there's more than one minSpeedup, meanSpeedup = self._compareResults(baseRes, testRes) minSpeeds.append(minSpeedup) meanSpeeds.append(meanSpeedup) matchedResults.append(testRes) header, all_keys = get_dyn_keys(matchedResults) header_string = r'$\bf{' + header + '}$' labels = generate_label_table(matchedResults, all_keys) sort_keys = np.argsort(minSpeeds) sorted_min_speedsup = np.array(minSpeeds)[sort_keys] sorted_mean_speedsup = np.array(meanSpeeds)[sort_keys] sorted_labels = labels[sort_keys] sorted_labels = np.append(sorted_labels, header_string) colors = [ 'g' if speed >= CheckFlow.PERF_THRESHOLD else 'r' for speed in sorted_min_speedsup ] fig = plt.figure(figsize=(15, len(labels) / 4)) plt.title('Speedup') plt.barh(np.arange(len(sort_keys)), sorted_min_speedsup, color=colors, alpha=.5, label='Speedup: minimum runtime') plt.barh(np.arange(len(sort_keys)), sorted_mean_speedsup, color='gray', alpha=.5, label='Speedup: mean runtime') plt.axvline(1, c='k', label='no change') plt.axvline(CheckFlow.PERF_THRESHOLD, c='r', label='passing threshold') plt.yticks(np.arange(len(sorted_labels)), sorted_labels) plt.legend(loc='lower right') plt.grid(which='both', axis='x') plt.xlim(0, 2) plt.tight_layout() plt.show() print( f"{missing_results_counter} not matched out of {len(baseResults)}") return fig, sorted_min_speedsup, sorted_mean_speedsup, missing_results_counter
password=os.environ['PASSWORD']) # New setups with more tuning-samples and changed rebuild frequency homoID = '5f44050def458403b65f97fa' imhomoID = '5f44050def458403b65f97f9' sha = '20382287f7f3d1ff2aa8414891ea657245670c80' h**o = Setup.objects().get(id=homoID) inhomo = Setup.objects().get(id=imhomoID) for s_name, setup in zip(['h**o', 'inhomo'], [h**o, inhomo]): configs = Config.objects(setup=setup, commitSHA=sha) # TODO: Remove limit here for conf in configs: results = list(Result.objects(config=conf)) # [:10] data = [] labels = [] for res in results: res: Result resDict = res.__dict__ keys = [ k for k in resDict.keys() if 'dynamic' in k and '_dynamic_lock' not in k ] labels.append(''.join([f'{str(resDict[k])} ' for k in keys])) data.append(res.measurements) data = np.array(data) # TODO: Watch out for rebuild freq change rebuild_freq = 4