def test_read_channel_config_invalid_path(self): self.fake_config.set('ircbot', 'channel_config', 'fake_path.yaml') with self.assertRaises(bot.ElasticRecheckException) as exc: bot._main([], er_conf.Config(config_obj=self.fake_config)) raised_exc = exc.exception error_msg = "Unable to read layout config file at fake_path.yaml" self.assertEqual(str(raised_exc), error_msg)
def test_read_channel_config_not_specified(self): self.fake_config.set('ircbot', 'channel_config', None) with self.assertRaises(bot.ElasticRecheckException) as exc: bot._main([], er_conf.Config(config_obj=self.fake_config)) raised_exc = exc.exception self.assertEqual(str(raised_exc), "Channel Config must be specified " "in config file.")
def main(): args = get_options() config = er_conf.Config(config_file=args.conffile) if args.foreground: _main(args, config) else: pid = pid_file_module.TimeoutPIDLockFile(config.pid_fn, 10) with daemon.DaemonContext(pidfile=pid): _main(args, config)
def setUp(self): super(TestQueries, self).setUp() config = er_conf.Config(config_file='elasticRecheck.conf') self.classifier = elasticRecheck.Classifier(config.gerrit_query_file, config=config) self.lp = launchpad.Launchpad.login_anonymously( 'grabbing bugs', 'production', LPCACHEDIR) self.openstack_projects = ( self.get_group_projects('openstack') + self.get_group_projects('oslo') + # Fix for story 2006737 since os-brick is # not in the openstack group in launchpad. ['os-brick'])
def setUp(self): super(TestBot, self).setUp() self.fake_config = configparser.ConfigParser({'server_password': None}, allow_no_value=True) _set_fake_config(self.fake_config) config = er_conf.Config(config_obj=self.fake_config) self.channel_config = bot.ChannelConfig( yaml.load(open('recheckwatchbot.yaml'))) with mock.patch('launchpadlib.launchpad.Launchpad'): self.recheck_watch = bot.RecheckWatch(None, self.channel_config, None, config=config, commenting=False)
def main(): opts = get_options() config = er_config.Config( config_file=opts.conf, uncat_search_size=opts.search_size, all_fails_query=opts.all_fails_query, excluded_jobs_regex=opts.excluded_jobs_regex, included_projects_regex=opts.included_projects_regex) classifier = er.Classifier(opts.dir, config=config) all_gate_fails = all_fails(classifier, config=config) if opts.verbose: level = logging.DEBUG else: level = logging.INFO logging.basicConfig(format='%(asctime)s [%(name)s] %(levelname)s: ' '%(message)s', level=level) if level == logging.INFO: # NOTE(mtreinish: This logger is overly chatty at INFO logging every # time an HTTP connection is established. This isn't really useful # at INFO for this command logging.getLogger('requests.packages.urllib3.connectionpool').setLevel( logging.WARN) LOG.info("Starting search for unclassified failures") for group in all_gate_fails: LOG.info("Processing failures for group: %s", group) fails = all_gate_fails[group] if not fails: # It would be pretty spectacular if we had no failures so if we're # using the default all failures query, there could be a problem # with the query, so log a hint. if opts.all_fails_query == er_config.ALL_FAILS_QUERY: LOG.warning( 'No failures found in group "%s". The default ' 'ALL_FAILS_QUERY might be broken.', group) continue data = collect_metrics(classifier, fails, config=config) engine = setup_template_engine(opts.templatedir, group=group) html = classifying_rate(fails, data, engine, classifier, config.ls_url) if opts.output: out_dir = opts.output else: out_dir = os.getcwd() with open(os.path.join(out_dir, group + '.html'), "w") as f: f.write(html)
def __init__(self, ircbot, channel_config, msgs, config=None, commenting=True): super(RecheckWatch, self).__init__() self.config = config or er_conf.Config() self.ircbot = ircbot self.channel_config = channel_config self.msgs = msgs self.log = logging.getLogger('recheckwatchbot') self.username = config.gerrit_user self.queries = config.gerrit_query_file self.host = config.gerrit_host self.connected = False self.commenting = commenting self.key = config.gerrit_host_key self.lp = launchpad.Launchpad.login_anonymously('grabbing bugs', 'production', LPCACHEDIR, timeout=60)
def collect_metrics(classifier, fails, config=None): config = config or er_config.Config() data = {} for q in classifier.queries: try: results = classifier.hits_by_query(q['query'], size=config.uncat_search_size) hits = _status_count(results) LOG.debug("Collected metrics for query %s, hits %s", q['query'], hits) data[q['bug']] = { 'fails': _failure_count(hits), 'hits': hits, 'percentages': _failure_percentage(results, fails), 'query': q['query'], 'failed_jobs': _failed_jobs(results) } except requests.exceptions.ReadTimeout: LOG.exception("Failed to collection metrics for query %s" % q['query']) return data
def all_fails(classifier, config=None): """Find all the the fails in the integrated gate. This attempts to find all the build jobs in the integrated gate so we can figure out how good we are doing on total classification. """ config = config or er_config.Config() integrated_fails = {} other_fails = {} all_fails = {} results = classifier.hits_by_query(config.all_fails_query, size=config.uncat_search_size) facets = er_results.FacetSet() facets.detect_facets(results, ["build_uuid"]) for build in facets: for result in facets[build]: # If the job is on the exclude list, skip if re.search(config.excluded_jobs_regex, result.build_name): continue integrated_gate_projects = [ 'openstack/cinder', 'openstack/glance', 'openstack/keystone', 'openstack/neutron', 'openstack/nova', 'openstack/requirements', 'openstack/tempest', 'openstack-dev/devstack', 'openstack-dev/grenade', 'openstack-infra/devstack-gate', ] if result.project in integrated_gate_projects: name = result.build_name timestamp = dp.parse(result.timestamp) if 'console.html' in result.log_url: log = result.log_url.split('console.html')[0] elif 'job-output.txt' in result.log_url: log = result.log_url.split('job-output.txt')[0] integrated_fails["%s.%s" % (build, name)] = { 'log': log, 'timestamp': timestamp, 'build_uuid': result.build_uuid } else: # not perfect, but basically an attempt to show the integrated # gate. Would be nice if there was a zuul attr for this in es. if re.search(config.included_projects_regex, result.project): name = result.build_name timestamp = dp.parse(result.timestamp) if 'console.html' in result.log_url: log = result.log_url.split('console.html')[0] elif 'job-output.txt' in result.log_url: log = result.log_url.split('job-output.txt')[0] other_fails["%s.%s" % (build, name)] = { 'log': log, 'timestamp': timestamp, 'build_uuid': result.build_uuid } LOG.debug("Found failure: %s build_uuid: %s project %s", len(all_fails), result.build_uuid, result.project) all_fails = {'integrated_gate': integrated_fails, 'others': other_fails} return all_fails
def setup(): global config if not config: args = parse_command_line_args() config = ConfigParser.ConfigParser() config.read(args.config_file) # Database Configuration global engine db_uri = _config_get(config.get, 'default', 'db_uri') pool_size = _config_get(config.getint, 'default', 'pool_size', 20) pool_recycle = _config_get(config.getint, 'default', 'pool_recycle', 3600) engine = create_engine(db_uri, pool_size=pool_size, pool_recycle=pool_recycle) global Session Session = sessionmaker(bind=engine) # RSS Configuration rss_opts['frontend_url'] = _config_get( config.get, 'default', 'frontend_url', 'http://status.openstack.org/openstack-health') # Elastic-recheck Configuration global query_dir query_dir = _config_get(config.get, 'default', 'query_dir', None) global es_url es_url = _config_get(config.get, 'default', 'es_url', None) if query_dir and er: elastic_config = er_config.Config(es_url=es_url) global classifier classifier = er.Classifier(query_dir, config=elastic_config) # Cache Configuration backend = _config_get(config.get, 'default', 'cache_backend', 'dogpile.cache.dbm') expire = _config_get(config.getint, 'default', 'cache_expiration', datetime.timedelta(minutes=30)) cache_file = _config_get( config.get, 'default', 'cache_file', os.path.join(tempfile.gettempdir(), 'openstack-health.dbm')) cache_url = _config_get(config.get, 'default', 'cache_url', None) global region if backend == 'dogpile.cache.dbm': args = {'filename': cache_file} if cache_url: def _key_generator(namespace, fn, **kw): namespace = fn.__name__ + (namespace or '') def generate_key(*arg): return namespace + "_".join( str(s).replace(' ', '_') for s in arg) return generate_key memcache_proxy = distributed_dbm.MemcachedLockedDBMProxy(cache_url) region = dogpile.cache.make_region( async_creation_runner=_periodic_refresh_cache, function_key_generator=_key_generator).configure( backend, expiration_time=expire, arguments=args, wrap=[memcache_proxy]) else: region = dogpile.cache.make_region().configure( backend, expiration_time=expire, arguments=args) else: args = {'distributed_lock': True} if cache_url: args['url'] = cache_url region = dogpile.cache.make_region( async_creation_runner=_periodic_refresh_cache).configure( backend, expiration_time=expire, arguments=args)
def main(): parser = argparse.ArgumentParser(description='Generate data for graphs.') parser.add_argument(dest='queries', help='path to query file') parser.add_argument('-o', dest='output', help='output filename. Omit for stdout') parser.add_argument('-q', dest='queue', help='limit results to a build queue regex') parser.add_argument('--es-query-suffix', help='further limit results with an ' 'elastic search query suffix. This will be ANDed ' 'to all queries. ' 'For example, to limit all queries to a ' 'specific branch use: ' ' --es-query-suffix "build_branch:\\"stable/' 'liberty\\""') parser.add_argument('-c', '--conf', help="Elastic Recheck Configuration " "file to use for data_source options such as " "elastic search url, logstash url, and database " "uri.") parser.add_argument('-v', dest='verbose', action='store_true', default=False, help='print out details as we go') args = parser.parse_args() config = er_conf.Config(config_file=args.conf) classifier = er.Classifier(args.queries, config=config) buglist = [] # if you don't hate timezones, you don't program enough epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc) ts = datetime.utcnow().replace(tzinfo=pytz.utc) # rawnow is useful for sending to javascript rawnow = int(((ts - epoch).total_seconds()) * 1000) ts = datetime(ts.year, ts.month, ts.day, ts.hour).replace(tzinfo=pytz.utc) # ms since epoch now = int(((ts - epoch).total_seconds()) * 1000) # number of days to match to, this should be the same as we are # indexing in logstash days = 10 # How far back to start in the graphs start = now - (days * 24 * STEP) # ER timeframe for search timeframe = days * 24 * STEP / 1000 last_indexed = int( ((classifier.most_recent() - epoch).total_seconds()) * 1000) behind = now - last_indexed # the data we're going to return, including interesting headers jsondata = { 'now': rawnow, 'last_indexed': last_indexed, 'behind': behind, 'buglist': [] } # Get the cluster health for the header es = pyelasticsearch.ElasticSearch(config.es_url) jsondata['status'] = es.health()['status'] for query in classifier.queries: if args.queue: query['query'] += ' AND build_queue:%s' % args.queue if args.es_query_suffix: query['query'] += ' AND (%s)' % args.es_query_suffix if query.get('suppress-graph'): continue if args.verbose: LOG.debug("Starting query for bug %s" % query['bug']) logstash_query = qb.encode_logstash_query(query['query'], timeframe=timeframe) logstash_url = ("%s/#/dashboard/file/logstash.json?%s" % (config.ls_url, logstash_query)) bug_data = get_launchpad_bug(query['bug']) bug = dict(number=query['bug'], query=query['query'], logstash_url=logstash_url, bug_data=bug_data, fails=0, fails24=0, data=[], voting=(False if query.get('allow-nonvoting') else True)) buglist.append(bug) try: results = classifier.hits_by_query(query['query'], args.queue, size=3000, days=days) except pyelasticsearch.exceptions.InvalidJsonResponseError: LOG.exception( "Invalid Json while collecting metrics for query %s" % query['query']) continue except requests.exceptions.ReadTimeout: LOG.exception("Timeout while collecting metrics for query %s" % query['query']) continue except pyelasticsearch.exceptions.ElasticHttpError as ex: LOG.error('Error from elasticsearch query for bug %s: %s', query['bug'], ex) continue facets_for_fail = er_results.FacetSet() facets_for_fail.detect_facets(results, ["build_status", "build_uuid"]) if "FAILURE" in facets_for_fail: bug['fails'] = len(facets_for_fail['FAILURE']) facets = er_results.FacetSet() facets.detect_facets(results, ["build_status", "timestamp", "build_uuid"]) for status in facets.keys(): data = [] for ts in range(start, now, STEP): if ts in facets[status]: fails = len(facets[status][ts]) data.append([ts, fails]) # get the last 24 hr count as well, can't wait to have # the pandas code and able to do it that way if status == "FAILURE" and ts > (now - (24 * STEP)): bug['fails24'] += fails else: data.append([ts, 0]) bug["data"].append(dict(label=status, data=data)) # the sort order is a little odd, but basically sort by failures in # the last 24 hours, then with all failures for ones that we haven't # seen in the last 24 hours. buglist = sorted(buglist, key=lambda bug: -(bug['fails24'] * 100000 + bug['fails'])) jsondata['buglist'] = buglist if args.output: out = open(args.output, 'w') else: out = sys.stdout try: out.write(json.dumps(jsondata)) finally: out.close()