def _init_subtest_data(self, *, data, result, fields): """ If no ID conflict, inserts initial subtest dictionary into data dictionary and returns the reference to the subtest dictionary for caller to modify further (as needed). """ description = self._test_description(fields=fields) if not description: subtest_id = MongoDB.encode_key(fields[2]) name = fields[2] number = None else: subtest_id = MongoDB.encode_key("{}:{}".format( fields[2], fields[3])) name = fields[2] number = fields[3] if subtest_id in data: raise XcTestHarnessLogParserException( "duplicate subtest ID: {}".format(subtest_id)) data[subtest_id] = { 'name': name, 'number': number, 'result': result, 'description': description } return data[subtest_id]
def filenames(self, *, bnum, group_name=None): coverage = self._get_coverage_data(bnum=bnum) if not coverage: return None rawnames = [] do_sort = False if group_name is not None and group_name != "All Files": rawnames = self.file_groups.expand(name=group_name) else: do_sort = True rawnames = sorted(coverage.keys()) have_total = False # Reduce a URL to just a filename filenames = [] for key in rawnames: url = MongoDB.decode_key(key) if url == 'Total': have_total = True continue fields = url.split('/') if len(fields) < 2: raise Exception("Incomprehensible: {}".format(url)) filename = "{}/{}".format(fields[-2], fields[-1]) if filename in filenames: raise Exception("Duplicate: {}".format(filename)) filenames.append(filename) if do_sort: filenames.sort() if have_total: filenames.insert(0, "Total") return filenames
def builds_for_version(self, *, test_group, xce_version): key = MongoDB.encode_key("{}_XCE_{}_builds".format( test_group, xce_version)) doc = self.meta.coll.find_one({'_id': key}) if not doc: return None return doc.get('values', None)
def update_build(self, *, jbi, log, is_reparse=False, test_mode=False): job_name = jbi.job_name bnum = jbi.build_number try: dir_path = os.path.join(self.artifacts_root, job_name, bnum) self.logger.debug("path is {}".format(dir_path)) results = UbmPerfResults(bnum=bnum, dir_path=dir_path) except UbmTestNoResultsError: return None data = results.index_data() self.logger.debug("data is {}".format(data)) atms = [] atms.append(('{}_builds'.format(UbmTestGroupName), bnum)) atms.append(('test_groups', '{}'.format(UbmTestGroupName))) xce_branch = jbi.git_branches().get('XCE', None) if xce_branch: data['xce_version'] = xce_branch builds_key_sfx = MongoDB.encode_key( "XCE_{}_builds".format(xce_branch)) atms.append( ('{}_XCE_branches'.format(UbmTestGroupName), xce_branch)) atms.append(('{}_{}'.format(UbmTestGroupName, builds_key_sfx), bnum)) if atms: data['_add_to_meta_set'] = atms return data
def branches(self, *, repo): # Return all known branches for the repo key = MongoDB.encode_key('{}_branches'.format(repo)) doc = self.coll.find_one({'_id': key}) if not doc: return [] return list(doc.get('branches', []))
def _store_cur_core(self, *, cores, cur_core): # Trim off any path prefix corefile_name = cur_core.get('corefile_name') if '/' in corefile_name: cur_core['corefile_name'] = corefile_name.split('/')[-1] key = MongoDB.encode_key(cur_core.get('corefile_name')) cores[key] = cur_core
def filenames(self, *, bnum, group_name=None): coverage = self._get_coverage_data(bnum=bnum) if not coverage: return None rawnames = [] do_sort = False if group_name is not None and group_name != "All Files": rawnames = self.file_groups.expand(name=group_name) else: # Load all file names available in coverage do_sort = True rawnames = coverage.keys() # Reduce to just final two path components filenames = [] have_total = False for key in rawnames: name = MongoDB.decode_key(key) if name == 'totals': have_total = True continue fields = name.split('/') if len(fields) < 2: raise Exception("Incomprehensible: {}".format(name)) filename = "{}/{}".format(fields[-2], fields[-1]) if filename in filenames: raise Exception("Duplicate: {}".format(filename)) filenames.append(filename) if do_sort: filenames.sort() if have_total: filenames.insert(0, "Total") return filenames
def coverage(self, *, bnum, filename): coverage = self._get_coverage_data(bnum=bnum) if not coverage: return None for key,data in coverage.items(): url = MongoDB.decode_key(key) if filename.lower() in url.lower(): return coverage[key].get('covered_pct', None) return None
def coverage(self, *, bnum, filename): """ XXXrs - FUTURE - extend to return other than "lines" percentage. """ if filename == "Total": filename = "totals" coverage = self._get_coverage_data(bnum=bnum) if not coverage: return None for key, data in coverage.items(): name = MongoDB.decode_key(key) if filename in name: return coverage[key].get('lines', {}).get('percent', None) return None
def find_builds(self, *, repo=None, branches=None, first_bnum=None, last_bnum=None, reverse=False): """ Return list (possibly empty) of build numbers matching the given attributes. """ if branches and not repo: raise ValueError("branches requires repo") # n.b. repo without branches is a no-op all_builds = self.all_builds() if not all_builds: return [] all_builds = sorted(all_builds, key=nat_sort) if first_bnum or last_bnum and not (first_bnum and last_bnum): if not first_bnum: first_bnum = all_builds[0] if not last_bnum: last_bnum = all_builds[-1] build_range = None if first_bnum: build_range = set([str(b) for b in range(int(first_bnum), int(last_bnum)+1)]) avail_builds = set() if repo: # Just those matching repo/branch for branch in branches: key = MongoDB.encode_key("{}_{}_builds".format(repo, branch)) doc = self.coll.find_one({'_id': key}) if not doc: continue avail_builds.update(doc.get('builds', [])) else: avail_builds.update(all_builds) # If our build range is limited, intersect... if build_range: build_list = list(avail_builds.intersection(build_range)) else: build_list = list(avail_builds) return sorted(build_list, key=nat_sort, reverse=reverse)
def update_build(self, *, jbi, log, is_reparse=False, test_mode=False): """ Return coverage info for a specific build. """ try: bnum = jbi.build_number path = os.path.join(self.artifacts_root, bnum, self.coverage_file_name) self.logger.debug("path: {}".format(path)) xdutc = XDUnitTestCoverage(path=path) data = {} for url,coverage in xdutc.get_data().items(): self.logger.debug("url: {} coverage: {}".format(url, coverage)) data[MongoDB.encode_key(url)] = coverage return {'coverage': data} except FileNotFoundError as e: self.logger.error("{} not found".format(path)) return None
def update_build(self, *, jbi, log, is_reparse=False, test_mode=False): job_name = jbi.job_name bnum = jbi.build_number try: dir_path = os.path.join(self.artifacts_root, job_name, bnum) results = SqlPerfResults(bnum=bnum, dir_path=dir_path, file_pats=self.file_pats) except SqlPerfNoResultsError as e: return None data = results.index_data() atms = [] tpch = False tpcds = False if 'tpchTest' in data: tpch = True atms.append(('tpchTest_builds', bnum)) atms.append(('test_groups', 'tpchTest')) if 'tpcdsTest' in data: tpcds = True atms.append(('tpcdsTest_builds', bnum)) atms.append(('test_groups', 'tpcdsTest')) xce_branch = jbi.git_branches().get('XCE', None) if xce_branch: data['xce_version'] = xce_branch builds_key_sfx = MongoDB.encode_key( "XCE_{}_builds".format(xce_branch)) if tpch: atms.append(('tpchTest_XCE_branches', xce_branch)) atms.append(('tpchTest_{}'.format(builds_key_sfx), bnum)) if tpcds: atms.append(('tpcdsTest_XCE_branches', xce_branch)) atms.append(('tpcdsTest_{}'.format(builds_key_sfx), bnum)) if atms: data['_add_to_meta_set'] = atms return data
def update_build(self, *, jbi, log, is_reparse=False, test_mode=False): bnum = jbi.build_number dir_path = os.path.join(self.artifacts_root, bnum) coverage_dir = ClangCoverageDir(coverage_dir=dir_path) try: coverage_dir.process() except: self.logger.exception("exception processing: {}".format(dir_path)) """ Read the coverage.json file and convert to our preferred index form, filtering for only files of interest (plus totals). """ coverage_file_path = os.path.join(self.artifacts_root, bnum, self.coverage_file_name) try: summaries = ClangCoverageFile( path=coverage_file_path).file_summaries() except FileNotFoundError: self.logger.exception( "file not found: {}".format(coverage_file_path)) return None except ClangCoverageEmptyFile: self.logger.exception( "file is empty: {}".format(coverage_file_path)) return None except Exception: self.logger.exception( "exception loading: {}".format(coverage_file_path)) raise data = {} for filename, summary in summaries.items(): data.setdefault('coverage', {})[MongoDB.encode_key(filename)] = summary return data
def _do_update_build(self, *, jbi, log, is_reparse=False, test_mode=False): """ Parse the log for sub-test info. """ self.start_time_ms = jbi.start_time_ms() self.duration_ms = jbi.duration_ms() past_start_marker = False past_durations_marker = False subtest_data = {} for lnum, line in enumerate(log.splitlines()): ''' 3339.573 ============================= test session starts ============================== ''' if not past_start_marker and "=== test session starts ===" in line: past_start_marker = True continue if not past_start_marker: continue ''' 5931.514 ========================== slowest 10 test durations =========================== ''' if "test durations ======" in line: past_durations_marker = True continue fields = line.split() if len(fields) < 3: continue ''' 5931.515 = 279 passed, 190 skipped, 1 deselected, 4 xfailed, 3 warnings in 2591.94s (0:43:11) = ''' if past_durations_marker and fields[1][0] == '=' and fields[-1][ -1] == '=': past_start_marker = False past_durations_marker = False continue ''' 5931.515 251.63s call test_udf.py::TestUdf::testSharedUdfSanity 5931.515 162.94s call test_operators.py::TestOperators::testAddManyColumns ''' if past_durations_marker: # duration parsing if fields[2] != "call": continue duration_ms = int(float(fields[1][:-1]) * 1000) subtest_id = MongoDB.encode_key(" ".join(fields[3:])) # XXXrs - Gaah! # # The sub-test identifier emitted in the "durations" section can # differ from the identifier emitted when that sub-test completes. # # Apply some ghastly ad-hoc transforms as a best-effort to # get things to match up :/ if subtest_id not in subtest_data: # Sometimes the "/" in a path gets doubled... subtest_id = subtest_id.replace("//", "/") if subtest_id not in subtest_data: # Sometimes a "more complete" path is emitted, trim it a bit at a time... sid_fields = subtest_id.split('/') while len(sid_fields) > 1: sid_fields.pop(0) sid = "/".join(sid_fields) if sid in subtest_data: subtest_id = sid break if subtest_id not in subtest_data: self.logger.error("LOG PARSE ERROR") self.logger.warn( "subtest_id {} in durations but not seen before". format(subtest_id)) continue subtest_data[subtest_id]['duration_ms'] = duration_ms # We're looking at test completion lines like: """ 3352.142 test_export.py::TestExport::testCombinations[table0-Default-csv-createRule0-splitRule3-every] SKIPPED [ 4%] 7521.393 io/test_csv.py::test_csv_parser[Easy_sanity_test-schemaFile] XFAIL [ 31%] 7613.720 io/test_csv.py::test_csv_parser[zero_length_fields-loadInputWithHeader] PASSED [ 35%] 3714.433 test_operators.py::TestOperators::testSelectNoRowsAggregate PASSED [ 49%] 10981.859 io/test_export.py::test_multiple_parquet_telecom_prefixed FAILED [ 98%] """ result_idx = None for result in ['PASSED', 'FAILED', 'SKIPPED', 'XFAIL', 'XPASS']: if result in fields: result_idx = fields.index(result) break if result_idx is None: continue try: timestamp_ms = int(self.start_time_ms + (float(fields[0]) * 1000)) except ValueError: self.logger.exception("timestamp parse error: {}".format(line)) continue name = " ".join(fields[1:result_idx]) subtest_id = MongoDB.encode_key(name) if not len(subtest_id): self.logger.error("LOG PARSE ERROR") self.logger.warn("missing subtest_id: {}".format(line)) continue if subtest_id in subtest_data: raise PyTestLogParserException( "duplicate subtest ID \'{}\': {}".format(subtest_id, line)) subtest_data[subtest_id] = { 'name': name, 'result': fields[result_idx], 'end_time_ms': timestamp_ms } """ NOTE FOR FUTURE Might care about these markers/signatures: 11017.391 =================================== FAILURES =================================== 11017.391 ____________________ test_multiple_parquet_telecom_prefixed ____________________ 11017.391 ...SNIP... 11017.400 ----------------------------- Captured stdout call ----------------------------- ...SNIP... 11017.400 ---------- coverage: platform linux, python 3.6.11-final-0 ----------- ... yadda yadda ... """ return { 'pytest_subtests': subtest_data } # XXXrs can there be multiple in the same log?
def _do_update_build(self, *, jbi, log, is_reparse=False, test_mode=False): """ Parse the log for sub-test info. """ self.start_time_ms = jbi.start_time_ms() self.duration_ms = jbi.duration_ms() subtest_data = {} cur_subtest = None for lnum, line in enumerate(log.splitlines()): fields = line.split() if len(fields) < 3: continue if cur_subtest is not None: if fields[1] == 'Error:': cur_subtest['result'] = "Error" cur_subtest['reason'] = " ".join(fields[3:]) continue elif fields[1] == "SUBTEST_RESULT:" or fields[ 1] == "TESTCASE_RESULT:": cur_subtest['result'] = " ".join(fields[3:]) continue else: # If field[1] is our subtest name assume fields[3:] is result name = fields[1][1:-1] if name == cur_subtest['name']: cur_subtest['result'] = " ".join(fields[3:]) # XXXrs continue if fields[1] == "SUBTEST_START:" or fields[1] == "TESTCASE_START:": if cur_subtest is not None: raise FuncTestLogParserException( "nested TEST_START\n{}: {}".format(lnum, line)) test_name = fields[2] test_id = MongoDB.encode_key(test_name) cur_subtest = { 'name': test_name, 'id': test_id, 'start_time_ms': self._get_timestamp_ms(fields=fields) } continue if fields[1] == "SUBTEST_END:" or fields[1] == "TESTCASE_END:": if cur_subtest is None: raise FuncTestLogParserException( "TEST_END before TEST_START\n{}: {}".format( lnum, line)) if fields[2] != cur_subtest['name']: raise FuncTestLogParserException( "unmatched TEST_END for {} while cur_subtest {}\n{}: {}" .format(fields[2], cur_subtest, lnum, line)) ts_ms = self._get_timestamp_ms(fields=fields) duration_ms = ts_ms - cur_subtest['start_time_ms'] cur_subtest['duration_ms'] = duration_ms test_id = cur_subtest.pop('id') if test_id not in subtest_data: subtest_data[test_id] = {} iteration = len(subtest_data[test_id].keys()) + 1 subtest_data[test_id][str(iteration)] = cur_subtest cur_subtest = None continue if cur_subtest is None: continue if fields[1] == "NumTests:": try: cnt = int(fields[2]) except ValueError: raise FuncTestLogParserException( "non-integer NumTests value\n{}: {}".format( lnum, line)) if cnt > 1: raise FuncTestLogParserException( "unexpected NumTests value\n{}: {}".format(lnum, line)) if cnt == 0: cur_subtest['result'] = "Skip" # XXXrs ?!? return {'functest_subtests': subtest_data}
if args.daily: cur_day = day cur_data = {} flush = False job_name = binfo.pop('job_name') jdc = get_job_data_collection(job_name=job_name) build_number = binfo.pop('build_number') build_data = jdc.get_data(bnum=build_number) for sub in sub_blocks: if sub in build_data: if sub == 'analyzed_cores': fixed = {} for key, item in build_data[sub].items(): key = MongoDB.decode_key(key) fixed[key] = item build_data[sub] = fixed cur_data.setdefault(sub, {}).setdefault( job_name, {})[build_number] = build_data.pop(sub) cur_data.setdefault('builds', {}).setdefault(job_name, {})[build_number] = build_data if cur_data: write_data(outdir=args.outdir, year=cur_year, month=cur_month, day=cur_day, data=cur_data)
def index_data(self, *, bnum, data, is_done, is_reparse): """ Extract certain meta-data from the data set and "index". This is largly for the purpose of dashboard time efficiency. This may become obsolete when data are processed/indexed via Xcalar. is_reparse is here for consistency with other similar index/store methods, but is not presently used. """ if is_done: self.logger.info("processing completed build {}:{}" .format(self.job_name, bnum)) # Add to all_builds list when complete self.coll.find_one_and_update({'_id': 'all_builds'}, {'$addToSet': {'builds': bnum}}, upsert = True) else: self.logger.info("processing incomplete build {}:{}" .format(self.job_name, bnum)) # Remove any retry entry self.cancel_retry(bnum=bnum) # Remove any reparse entry self.cancel_reparse(bnum=bnum) if not data: self.logger.error("empty data for {}:{}" .format(self.job_name, bnum)) return # Nothing more to do. # If we have branch data, add to the builds-by-branch list(s) git_branches = data.get('git_branches', {}) for repo, branch in git_branches.items(): # Add repo to all repos list self.coll.find_one_and_update({'_id': 'all_repos'}, {'$addToSet': {'repos': repo}}, upsert = True) # Add branch to list of branches for the repo key = MongoDB.encode_key("{}_branches".format(repo)) self.coll.find_one_and_update({'_id': key}, {'$addToSet': {'branches': branch}}, upsert = True) # Add build to the list of builds for the repo/branch pair key = MongoDB.encode_key("{}_{}_builds".format(repo, branch)) self.coll.find_one_and_update({'_id': key}, {'$addToSet': {'builds': bnum}}, upsert = True) # _add_to_meta_set is a list of key/val pairs. The key will define a document, # and the val will be added to the 'values' set in that document iff it is not # already present. add_to_meta_set = data.pop('_add_to_meta_set', []) for key,val in add_to_meta_set: self.coll.find_one_and_update({'_id': key}, {'$addToSet': {'values': val}}, upsert = True)