class StatsScanner: COUCHBASE_BUCKET = 'stats' COUCHBASE_HOST = 'perflab.sc.couchbase.com' COUCHBASE_PASSWORD = '******' # Yay! STATUS_QUERY = """ SELECT component, COUNT(1) AS total FROM stats WHERE version = $1 GROUP BY component ORDER BY component; """ SNAPSHOT_QUERY = """ SELECT RAW snapshots FROM benchmarks WHERE buildURL = $1; """ def __init__(self): self.bucket = self.new_bucket() self.jenkins = JenkinsScanner() self.ps = PerfStore(host=CBMONITOR_HOST) self.weekly = Weekly() @property def connection_string(self) -> str: return 'couchbase://{}/{}?password={}'.format(self.COUCHBASE_HOST, self.COUCHBASE_BUCKET, self.COUCHBASE_PASSWORD) def new_bucket(self) -> Bucket: return Bucket(connection_string=self.connection_string) @staticmethod def generate_key(attributes: dict) -> str: return ''.join((attributes['cluster'], attributes['test_config'], attributes['version'], attributes['metric'], attributes.get('bucket', ''), attributes.get('server', ''), attributes.get('index', ''))) def store_metric_info(self, attributes: dict): key = self.generate_key(attributes) self.bucket.upsert(key=key, value=attributes) def get_summary(self, db: str, metric: str) -> Optional[Dict[str, float]]: if self.ps.exists(db=db, metric=metric): return self.ps.get_summary(db=db, metric=metric) return {} def cluster_stats(self, cluster: str) -> Iterator[dict]: m = MetadataClient(settings=StatsSettings(cluster, CBMONITOR_HOST)) for metric in m.get_metrics(): db = self.ps.build_dbname(cluster=cluster, collector=metric['collector']) summary = self.get_summary(db=db, metric=metric['name']) if summary: yield { 'metric': metric['name'], 'summary': summary, } def bucket_stats(self, cluster: str) -> Iterator[dict]: m = MetadataClient(settings=StatsSettings(cluster, CBMONITOR_HOST)) for bucket in m.get_buckets(): for metric in m.get_metrics(bucket=bucket): db = self.ps.build_dbname(cluster=cluster, collector=metric['collector'], bucket=bucket) summary = self.get_summary(db=db, metric=metric['name']) if summary: yield { 'bucket': bucket, 'metric': metric['name'], 'summary': summary, } def server_stats(self, cluster: str) -> Iterator[dict]: m = MetadataClient(settings=StatsSettings(cluster, CBMONITOR_HOST)) for server in m.get_servers(): for metric in m.get_metrics(server=server): db = self.ps.build_dbname(cluster=cluster, collector=metric['collector'], server=server) summary = self.get_summary(db=db, metric=metric['name']) if summary: yield { 'metric': metric['name'], 'server': server, 'summary': summary, } def index_stats(self, cluster: str) -> Iterator[dict]: m = MetadataClient(settings=StatsSettings(cluster, CBMONITOR_HOST)) for index in m.get_indexes(): for metric in m.get_metrics(index=index): db = self.ps.build_dbname(cluster=cluster, collector=metric['collector'], index=index) summary = self.get_summary(db=db, metric=metric['name']) if summary: yield { 'index': index, 'metric': metric['name'], 'summary': summary, } def find_snapshots(self, url: str): n1ql_query = N1QLQuery(self.SNAPSHOT_QUERY, url) for snapshots in self.bucket.n1ql_query(n1ql_query): for snapshot in snapshots: yield snapshot def all_stats(self, url: str): for snapshot in self.find_snapshots(url=url): for stats in self.cluster_stats(cluster=snapshot): yield stats for stats in self.bucket_stats(cluster=snapshot): yield stats for stats in self.server_stats(cluster=snapshot): yield stats for stats in self.index_stats(cluster=snapshot): yield stats def get_checkpoint(self, url: str) -> Optional[dict]: try: return self.bucket.get(url) except NotFoundError: return def add_checkpoint(self, url: str): self.bucket.insert(key=url, value={}) logger.info('Added checkpoint for {}'.format(url)) def find_metrics(self, version: str): for build in self.jenkins.find_builds(version=version): meta = { 'version': version, 'cluster': build['cluster'], 'component': build['component'], 'test_config': build['test_config'], } if self.get_checkpoint(build['url']) is None: for stats in self.all_stats(url=build['url']): yield {**stats, **meta} self.add_checkpoint(build['url']) def run(self): for build in self.weekly.builds: logger.info('Scanning stats from build {}'.format(build)) for attributes in self.find_metrics(build): if attributes is not None: self.store_metric_info(attributes) def update_status(self): for build in self.weekly.builds: logger.info('Updating status of build {}'.format(build)) n1ql_query = N1QLQuery(self.STATUS_QUERY, build) for status in self.bucket.n1ql_query(n1ql_query): status = { 'build': build, 'component': status['component'], 'metric_status': { 'collected': status['total'], }, } self.weekly.update_status(status)
class StatsScanner: COUCHBASE_BUCKET = 'stats' COUCHBASE_HOST = 'perflab.sc.couchbase.com' COUCHBASE_PASSWORD = '******' # Yay! STATUS_QUERY = """ SELECT component, COUNT(1) AS total FROM stats WHERE version = $1 GROUP BY component ORDER BY component; """ SNAPSHOT_QUERY = """ SELECT RAW snapshots FROM benchmarks WHERE buildURL = $1; """ def __init__(self): pass_auth = PasswordAuthenticator(self.COUCHBASE_BUCKET, self.COUCHBASE_PASSWORD) options = ClusterOptions(authenticator=pass_auth) self.cluster = Cluster(connection_string=self.connection_string, options=options) self.bucket = self.cluster.bucket( self.COUCHBASE_BUCKET).default_collection() self.jenkins = JenkinsScanner() self.ps = PerfStore(host=CBMONITOR_HOST) self.weekly = Weekly() @property def connection_string(self) -> str: return 'couchbase://{}?password={}'.format(self.COUCHBASE_HOST, self.COUCHBASE_PASSWORD) @staticmethod def generate_key(attributes: dict) -> str: return ''.join((attributes['cluster'], attributes['test_config'], attributes['version'], attributes['metric'], attributes.get('bucket', ''), attributes.get('server', ''), attributes.get('index', ''))) def store_metric_info(self, attributes: dict): key = self.generate_key(attributes) self.bucket.upsert(key=key, value=attributes) def get_summary(self, db: str, metric: str) -> Optional[Dict[str, float]]: if self.ps.exists(db=db, metric=metric): return self.ps.get_summary(db=db, metric=metric) return {} def cluster_stats(self, cluster: str) -> Iterator[dict]: m = MetadataClient(settings=StatsSettings(cluster, CBMONITOR_HOST)) for metric in m.get_metrics(): db = self.ps.build_dbname(cluster=cluster, collector=metric['collector']) summary = self.get_summary(db=db, metric=metric['name']) if summary: yield { 'metric': metric['name'], 'summary': summary, } def bucket_stats(self, cluster: str) -> Iterator[dict]: m = MetadataClient(settings=StatsSettings(cluster, CBMONITOR_HOST)) for bucket in m.get_buckets(): for metric in m.get_metrics(bucket=bucket): db = self.ps.build_dbname(cluster=cluster, collector=metric['collector'], bucket=bucket) summary = self.get_summary(db=db, metric=metric['name']) if summary: yield { 'bucket': bucket, 'metric': metric['name'], 'summary': summary, } def server_stats(self, cluster: str) -> Iterator[dict]: m = MetadataClient(settings=StatsSettings(cluster, CBMONITOR_HOST)) for server in m.get_servers(): for metric in m.get_metrics(server=server): db = self.ps.build_dbname(cluster=cluster, collector=metric['collector'], server=server) summary = self.get_summary(db=db, metric=metric['name']) if summary: yield { 'metric': metric['name'], 'server': server, 'summary': summary, } def index_stats(self, cluster: str) -> Iterator[dict]: m = MetadataClient(settings=StatsSettings(cluster, CBMONITOR_HOST)) for index in m.get_indexes(): for metric in m.get_metrics(index=index): db = self.ps.build_dbname(cluster=cluster, collector=metric['collector'], index=index) summary = self.get_summary(db=db, metric=metric['name']) if summary: yield { 'index': index, 'metric': metric['name'], 'summary': summary, } def find_snapshots(self, url: str): for snapshots in self.cluster.query( self.SNAPSHOT_QUERY, QueryOptions(positional_parameters=url)): for snapshot in snapshots: yield snapshot def all_stats(self, url: str): for snapshot in self.find_snapshots(url=url): for stats in self.cluster_stats(cluster=snapshot): yield stats for stats in self.bucket_stats(cluster=snapshot): yield stats for stats in self.server_stats(cluster=snapshot): yield stats for stats in self.index_stats(cluster=snapshot): yield stats def get_checkpoint(self, url: str) -> Optional[dict]: try: return self.bucket.get(url).content except Exception as ex: logger.info(ex) return def add_checkpoint(self, url: str): self.bucket.insert(key=url, value={}) logger.info('Added checkpoint for {}'.format(url)) def find_metrics(self, version: str): for build in self.jenkins.find_builds(version=version): meta = { 'version': version, 'cluster': build['cluster'], 'component': build['component'], 'test_config': build['test_config'], } if self.get_checkpoint(build['url']) is None: for stats in self.all_stats(url=build['url']): yield {**stats, **meta} self.add_checkpoint(build['url']) def run(self): for build in self.weekly.builds: logger.info('Scanning stats from build {}'.format(build)) for attributes in self.find_metrics(build): if attributes is not None: self.store_metric_info(attributes) def update_status(self): for build in self.weekly.builds: logger.info('Updating status of build {}'.format(build)) for status in self.cluster.query( self.STATUS_QUERY, QueryOptions(positional_parameters=build)): status = { 'build': build, 'component': status['component'], 'metric_status': { 'collected': status['total'], }, } self.weekly.update_status(status)
class JenkinsScanner: COUCHBASE_BUCKET = 'jenkins' COUCHBASE_HOST = 'perflab.sc.couchbase.com' COUCHBASE_PASSWORD = '******' # Yay! JENKINS_URL = 'http://perf.jenkins.couchbase.com' STATUS_QUERY = """ SELECT component, COUNT(CASE WHEN (success = true) THEN 1 ELSE NULL END) AS passed, COUNT(CASE WHEN (success = false) THEN 1 ELSE NULL END) AS failed FROM jenkins WHERE version = $1 AND success IS NOT NULL GROUP BY component; """ BUILD_QUERY = """ SELECT component, test_config, `cluster`, url FROM jenkins WHERE version = $1; """ def __init__(self): pass_auth = PasswordAuthenticator(self.COUCHBASE_BUCKET, self.COUCHBASE_PASSWORD) options = ClusterOptions(authenticator=pass_auth) self.cluster = Cluster(connection_string=self.connection_string, options=options) self.bucket = self.cluster.bucket( self.COUCHBASE_BUCKET).default_collection() self.jenkins = jenkins.Jenkins(self.JENKINS_URL) self.weekly = Weekly() self.jobs = set() @property def connection_string(self) -> str: return 'couchbase://{}?password={}'.format(self.COUCHBASE_HOST, self.COUCHBASE_PASSWORD) def get_checkpoint(self, job_name: str) -> Optional[int]: try: return self.bucket.get(job_name).content except Exception as ex: logger.info(ex) return 0 def add_checkpoint(self, job_name: str, build_number: int): self.bucket.upsert(key=job_name, value=build_number, persist_to=1) logger.info('Added checkpoint for {}'.format(job_name)) def store_build_info(self, attributes: dict): key = self.generate_key(attributes) self.bucket.upsert(key=key, value=attributes) logger.info('Added: {}'.format(attributes['url'])) @staticmethod def generate_key(attributes: dict) -> str: return '_'.join((attributes['cluster'], attributes['test_config'], attributes['version'])) def map_jobs(self) -> JobMapping: job_mapping = defaultdict(list) for pipeline in glob.glob('tests/pipelines/weekly-*.json'): with open(pipeline) as fh: for component, jobs in json.load(fh).items(): for job in jobs: self.jobs.add(job['job']) job_mapping[component].append(job) return job_mapping def map_test_configs(self, job_mapping: JobMapping) -> Dict[str, str]: test_configs = {} for component, jobs in job_mapping.items(): for job in jobs: test_config = job['test_config'] test_configs[test_config] = component.split('-')[0] return test_configs @staticmethod def extract_parameters(actions: List[Dict]) -> dict: for action in actions: if action.get('_class') == 'hudson.model.ParametersAction': parameters = {} for parameter in action['parameters']: parameter_name = parameter['name'] if parameter_name == 'dry_run' and parameter['value']: return {} # Ignore dry runs if parameter_name in ('cluster', 'test_config', 'version'): parameters[parameter_name] = parameter['value'] return parameters @staticmethod def merge_attributes(component: str, job: str, build_info: dict, build_parameters: dict) -> dict: build_parameters.update({ 'component': component, 'duration': build_info['duration'], 'job': job, 'success': build_info['result'] == 'SUCCESS', 'timestamp': build_info['timestamp'], 'url': build_info['url'], }) return build_parameters def build_info(self) -> Iterator[Tuple[str, dict]]: for job_name in self.jobs: checkpoint = self.get_checkpoint(job_name) new_checkpoint = checkpoint job_info = self.jenkins.get_job_info(job_name, fetch_all_builds=True) for build in sorted(job_info['builds'], key=lambda b: b['number']): build_number = build['number'] if build_number > checkpoint: build_info = self.jenkins.get_build_info( job_name, build_number) if build_info['result'] is not None: new_checkpoint = max(new_checkpoint, build_number) yield job_name, build_info self.add_checkpoint(job_name, new_checkpoint) def build_ext_info(self) -> Iterator[Tuple[str, dict, dict]]: for job_name, build_info in self.build_info(): build_actions = build_info['actions'] build_parameters = self.extract_parameters(build_actions) if build_parameters: yield job_name, build_info, build_parameters def scan(self): jobs = self.map_jobs() test_configs = self.map_test_configs(jobs) for job_name, build_info, build_parameters in self.build_ext_info(): test_config = build_parameters['test_config'] component = test_configs.get(test_config) if component is not None: attributes = self.merge_attributes(component, job_name, build_info, build_parameters) self.store_build_info(attributes) def update_status(self): for build in self.weekly.builds: logger.info('Updating status of build {}'.format(build)) for status in self.cluster.query( self.STATUS_QUERY, QueryOptions(positional_parameters=build)): status = { 'build': build, 'component': status['component'], 'test_status': { 'passed': status['passed'], 'failed': status['failed'], }, } self.weekly.update_status(status) def find_builds(self, version: str) -> Iterator[dict]: for build in self.cluster.query( self.BUILD_QUERY, QueryOptions(positional_parameters=version)): yield build
class JenkinsScanner: COUCHBASE_BUCKET = 'jenkins' COUCHBASE_HOST = 'perflab.sc.couchbase.com' COUCHBASE_PASSWORD = '******' # Yay! JENKINS_URL = 'http://perf.jenkins.couchbase.com' STATUS_QUERY = """ SELECT component, COUNT(CASE WHEN (success = true) THEN 1 ELSE NULL END) AS passed, COUNT(CASE WHEN (success = false) THEN 1 ELSE NULL END) AS failed FROM jenkins WHERE version = $1 AND success IS NOT NULL GROUP BY component; """ BUILD_QUERY = """ SELECT component, test_config, `cluster`, url FROM jenkins WHERE version = $1; """ def __init__(self): self.bucket = self.new_bucket() self.jenkins = jenkins.Jenkins(self.JENKINS_URL) self.weekly = Weekly() self.jobs = set() @property def connection_string(self) -> str: return 'couchbase://{}/{}?password={}'.format(self.COUCHBASE_HOST, self.COUCHBASE_BUCKET, self.COUCHBASE_PASSWORD) def new_bucket(self) -> Bucket: return Bucket(connection_string=self.connection_string) def get_checkpoint(self, job_name: str) -> Optional[int]: try: return self.bucket.get(job_name).value except NotFoundError: return 0 def add_checkpoint(self, job_name: str, build_number: int): self.bucket.upsert(key=job_name, value=build_number, persist_to=1) logger.info('Added checkpoint for {}'.format(job_name)) def store_build_info(self, attributes: dict): key = self.generate_key(attributes) self.bucket.upsert(key=key, value=attributes) logger.info('Added: {}'.format(attributes['url'])) @staticmethod def generate_key(attributes: dict) -> str: return '_'.join((attributes['cluster'], attributes['test_config'], attributes['version'])) def map_jobs(self) -> JobMapping: job_mapping = defaultdict(list) for pipeline in glob.glob('tests/pipelines/weekly-*.json'): with open(pipeline) as fh: for component, jobs in json.load(fh).items(): for job in jobs: self.jobs.add(job['job']) job_mapping[component].append(job) return job_mapping def map_test_configs(self, job_mapping: JobMapping) -> Dict[str, str]: test_configs = {} for component, jobs in job_mapping.items(): for job in jobs: test_config = job['test_config'] test_configs[test_config] = component.split('-')[0] return test_configs @staticmethod def extract_parameters(actions: List[Dict]) -> dict: for action in actions: if action.get('_class') == 'hudson.model.ParametersAction': parameters = {} for parameter in action['parameters']: parameter_name = parameter['name'] if parameter_name == 'dry_run' and parameter['value']: return {} # Ignore dry runs if parameter_name in ('cluster', 'test_config', 'version'): parameters[parameter_name] = parameter['value'] return parameters @staticmethod def merge_attributes(component: str, job: str, build_info: dict, build_parameters: dict) -> dict: build_parameters.update({ 'component': component, 'duration': build_info['duration'], 'job': job, 'success': build_info['result'] == 'SUCCESS', 'timestamp': build_info['timestamp'], 'url': build_info['url'], }) return build_parameters def build_info(self) -> Iterator[Tuple[str, dict]]: for job_name in self.jobs: checkpoint = self.get_checkpoint(job_name) new_checkpoint = checkpoint job_info = self.jenkins.get_job_info(job_name, fetch_all_builds=True) for build in sorted(job_info['builds'], key=lambda b: b['number']): build_number = build['number'] if build_number > checkpoint: build_info = self.jenkins.get_build_info(job_name, build_number) if build_info['result'] is not None: new_checkpoint = max(new_checkpoint, build_number) yield job_name, build_info self.add_checkpoint(job_name, new_checkpoint) def build_ext_info(self) -> Iterator[Tuple[str, dict, dict]]: for job_name, build_info in self.build_info(): build_actions = build_info['actions'] build_parameters = self.extract_parameters(build_actions) if build_parameters: yield job_name, build_info, build_parameters def scan(self): jobs = self.map_jobs() test_configs = self.map_test_configs(jobs) for job_name, build_info, build_parameters in self.build_ext_info(): test_config = build_parameters['test_config'] component = test_configs.get(test_config) if component is not None: attributes = self.merge_attributes(component, job_name, build_info, build_parameters) self.store_build_info(attributes) def update_status(self): for build in self.weekly.builds: logger.info('Updating status of build {}'.format(build)) n1ql_query = N1QLQuery(self.STATUS_QUERY, build) for status in self.bucket.n1ql_query(n1ql_query): status = { 'build': build, 'component': status['component'], 'test_status': { 'passed': status['passed'], 'failed': status['failed'], }, } self.weekly.update_status(status) def find_builds(self, version: str) -> Iterator[dict]: n1ql_query = N1QLQuery(self.BUILD_QUERY, version) for build in self.bucket.n1ql_query(n1ql_query): yield build