Example #1
0
class BugzillaCache(object):

  def __init__(self, logger=None, es_server=config.DEFAULT_ES_SERVER,
               bugzilla_api_url=config.DEFAULT_BUGZILLA_API_URL):
    self.bugzilla_api_url = bugzilla_api_url.rstrip('/')
    self.doc_type = 'bugs'
    self.index = 'bzcache'
    self.eslib = ESLib(es_server, self.index)
    self.logger = logger
    self.create_index(self.index)

  def log(self, msg):
    if self.logger:
      self.logger.info(msg)
    else:
      print msg

  def create_index(self, index):
    try:
      self.eslib.connection.open_index(index)
    except Exception:
      self.log('creating bzcache index')
      self.eslib.connection.create_index(index)

  def refresh_index(self):
    self.eslib.connection.refresh(indexes=[self.index])

  def _add_doc(self, doc, id=None):
    result = self.eslib.add_doc(doc, id, doc_type=self.doc_type)

    # ElasticSearch v1.x uses 'created', v0.9 uses 'ok'
    created = result.get('created', False) or result.get('ok', False)

    if created and '_id' in result:
      return result['_id']

    raise Exception(json.dumps(result))

  def fetch_json(self, url, params=None, timeout=30):
      self.log('Fetching %s with params %s' % (url, params))
      headers = {
          'Accept': 'application/json',
          'User-Agent': 'bzcache',
      }
      response = requests.get(url, params=params, headers=headers, timeout=timeout)
      response.raise_for_status()
      return response.json()

  def fetch_intermittent_bugs(self, offset, limit):
      url = self.bugzilla_api_url + '/bug'
      params = {
          'keywords': 'intermittent-failure',
          # only look at bugs that have been updated in the last 6 months
          'chfieldfrom': '-6m',
          'include_fields': 'id,summary,status,whiteboard',
          'offset': offset,
          'limit': limit,
      }
      results = self.fetch_json(url, params=params)
      return results.get('bugs', [])

  def index_bugs_by_keyword(self, keyword):
      bug_list = []

      offset = 0
      limit = 500

      # Keep querying Bugzilla until there are no more results.
      while True:
          bug_results_chunk = self.fetch_intermittent_bugs(offset, limit)
          bug_list += bug_results_chunk
          if len(bug_results_chunk) < limit:
              break
          offset += limit

      for bug in bug_list:
          self.add_or_update_bug(bug['id'],
                                 bug['status'],
                                 bug['summary'],
                                 bug['whiteboard'],
                                 False)

  def _get_bugzilla_data(self, bugid_array):
    # request bugs from Bugzilla in groups of 200
    chunk_size = 200
    bugs = []

    bugid_chunks = [list(bugid_array)[i:i+chunk_size]
                    for i in range(0, len(bugid_array), chunk_size)]
    for bugid_chunk in bugid_chunks:
        apiURL = (self.bugzilla_api_url + "/bug?id=" + ','.join(bugid_array) +
                  "&include_fields=id,summary,status,whiteboard")
        bugs += self.fetch_json(apiURL).get('bugs', [])
    return bugs

  def get_bugs(self, bugids):
    bugs = {}
    bugset = set(bugids)

    # request bugs from ES in groups of 250
    chunk_size = 250
    bug_chunks = [list(bugset)[i:i+chunk_size]
                  for i in range(0, len(bugset), chunk_size)]

    for bug_chunk in bug_chunks:
      data = self.eslib.query({ 'bugid': tuple(bug_chunk) },
                              doc_type=[self.doc_type])

      for bug in data:
        bugs[bug['bugid']] = {
          'status': bug['status'],
          'id': bug['bugid'],
          'summary': bug['summary'],
          'whiteboard': bug.get('whiteboard', '')
        }
        try:
          bugset.remove(str(bug['bugid']))
        except:
          pass

    if len(bugset):
      for bzbug in self._get_bugzilla_data(list(bugset)):
          bug_id = bzbug['id']
          bug_whiteboard = bzbug.get('whiteboard', '')
          bugs[bug_id] = {
              'id': bzbug['id'],
              'status': bzbug['status'],
              'summary': bzbug['summary'],
              'whiteboard': bug_whiteboard
          }
          self.add_or_update_bug(bug_id,
                                 bzbug['status'],
                                 bzbug['summary'],
                                 bug_whiteboard,
                                 False)

    return bugs

  def add_or_update_bug(self, bugid, status, summary, whiteboard, refresh=True):
    # make sure bugid is a string, for consistency
    bugid = str(bugid)

    date = datetime.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')

    try:

      # refresh the index to make sure it's up-to-date
      if refresh:
        self.refresh_index()

      data = { 'bugid': bugid,
               'status': status,
               'summary': summary,
               'whiteboard': whiteboard,
             }

      id = self._add_doc(data, bugid)
      self.log("%s - %s added, status: %s, id: %s" % (date, bugid, status, id))

    except Exception, inst:
      self.log('%s - exception while processing bug %s' % (date, id))
      self.log(inst)

    return True