def pullrequest_filepath_exists(self, filepath):
        ''' Check if a file exists on the submitters branch '''

        # https://github.com/ansible/ansibullbot/issues/406

        # https://developer.github.com/v3/repos/contents/
        #   GET /repos/:owner/:repo/readme
        # "contents_url":
        # "https://api.github.com/repos/ganeshrn/ansible/contents/{+path}",

        # self.pullrequest.head
        #   - ref --> branch name
        #   - repo.full_name

        sha = self.pullrequest.head.sha
        pdata = None
        resp = None
        cachefile = os.path.join(
            self.cachedir,
            u'issues',
            to_text(self.number),
            u'shippable_yml.pickle'
        )

        try:
            if os.path.isfile(cachefile):
                with open(cachefile, 'rb') as f:
                    pdata = pickle_load(f)
        except Exception as e:
            logging.error(u'failed to unpickle %s %s' % (cachefile, to_text(e)))

        if not pdata or pdata[0] != sha:

            if self.pullrequest.head.repo:
                url = self.pullrequest.head.repo.url + u'/contents/' + filepath
                resp = self.pullrequest._requester.requestJson(
                    u"GET",
                    url,
                    input={u'ref': self.pullrequest.head.ref}
                )
            else:
                # https://github.com/ansible/ansible/pull/19891
                # Sometimes the repo repo/branch has disappeared
                resp = [None]

            pdata = [sha, resp]
            with open(cachefile, 'wb') as f:
                pickle_dump(pdata, f)

        else:
            resp = pdata[1]

        result = False
        if resp[0]:
            result = True
        return result
Beispiel #2
0
    def load_update_fetch(self, property_name):
        '''Fetch a get() property for an object'''

        edata = None
        events = []
        updated = None
        update = False
        write_cache = False
        self.repo.update()

        pfile = os.path.join(self.cachedir, u'%s.pickle' % property_name)
        pdir = os.path.dirname(pfile)

        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            try:
                with open(pfile, 'rb') as f:
                    edata = pickle_load(f)
            except Exception as e:
                update = True
                write_cache = True

            # check the timestamp on the cache
            if edata:
                updated = edata[0]
                events = edata[1]
                if updated < self.repo.updated_at:
                    update = True
                    write_cache = True

        # pull all events if timestamp is behind or no events cached
        if update or not events:
            write_cache = True
            updated = self.get_current_time()
            try:
                methodToCall = getattr(self.repo, u'get_' + property_name)
            except Exception as e:
                logging.error(e)
                if C.DEFAULT_BREAKPOINTS:
                    logging.error(u'breakpoint!')
                    import epdb
                    epdb.st()
                else:
                    raise Exception(u'unable to get %s' % property_name)
            events = [x for x in methodToCall()]

        if C.DEFAULT_PICKLE_ISSUES:
            if write_cache or not os.path.isfile(pfile):
                # need to dump the pickle back to disk
                edata = [updated, events]
                with open(pfile, 'wb') as f:
                    pickle_dump(edata, f)

        return events
Beispiel #3
0
    def load_update_fetch(self, property_name):
        '''Fetch a get() property for an object'''

        edata = None
        events = []
        updated = None
        update = False
        write_cache = False
        self.repo.update()

        pfile = os.path.join(self.cachedir, u'%s.pickle' % property_name)
        pdir = os.path.dirname(pfile)

        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            try:
                with open(pfile, 'rb') as f:
                    edata = pickle_load(f)
            except Exception as e:
                update = True
                write_cache = True

            # check the timestamp on the cache
            if edata:
                updated = edata[0]
                events = edata[1]
                if updated < self.repo.updated_at:
                    update = True
                    write_cache = True

        # pull all events if timestamp is behind or no events cached
        if update or not events:
            write_cache = True
            updated = self.get_current_time()
            try:
                methodToCall = getattr(self.repo, u'get_' + property_name)
            except Exception as e:
                logging.error(e)
                if C.DEFAULT_BREAKPOINTS:
                    logging.error(u'breakpoint!')
                    import epdb; epdb.st()
                else:
                    raise Exception(u'unable to get %s' % property_name)
            events = [x for x in methodToCall()]

        if write_cache or not os.path.isfile(pfile):
            # need to dump the pickle back to disk
            edata = [updated, events]
            with open(pfile, 'wb') as f:
                pickle_dump(edata, f)

        return events
Beispiel #4
0
 def load_issue(self, number):
     pfile = os.path.join(self.cachedir, u'issues', to_text(number),
                          u'issue.pickle')
     if os.path.isfile(pfile):
         with open(pfile, 'rb') as f:
             try:
                 issue = pickle_load(f)
             except TypeError:
                 return False
         return issue
     else:
         return False
Beispiel #5
0
 def load_pullrequest(self, number):
     pfile = os.path.join(self.cachedir, u'issues', to_text(number),
                          u'pullrequest.pickle')
     pdir = os.path.dirname(pfile)
     if not os.path.isdir(pdir):
         os.makedirs(pdir)
     if os.path.isfile(pfile):
         with open(pfile, 'rb') as f:
             issue = pickle_load(f)
         return issue
     else:
         return False
Beispiel #6
0
    def get_pullrequest_status(self, force_fetch=False):
        fetched = False
        jdata = None
        pdata = None
        # pull out the status url from the raw data
        rd = self.pullrequest_raw_data
        surl = rd[u'statuses_url']

        pfile = os.path.join(self.full_cachedir, u'pr_status.pickle')
        pdir = os.path.dirname(pfile)
        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            logging.info(u'pullrequest_status load pfile')
            with open(pfile, 'rb') as f:
                pdata = pickle_load(f)

        if pdata:
            # is the data stale?
            if pdata[0] < self.pullrequest.updated_at or force_fetch:
                logging.info(u'fetching pr status: stale, previous from %s' %
                             pdata[0])
                jdata = self.github.get_request(surl)

                if isinstance(jdata, dict):
                    # https://github.com/ansible/ansibullbot/issues/959
                    logging.error(
                        u'Got the following error while fetching PR status: %s',
                        jdata.get(u'message'))
                    logging.error(jdata)
                    return []

                self.log_ci_status(jdata)
                fetched = True
            else:
                jdata = pdata[1]

        # missing?
        if not jdata:
            logging.info(u'fetching pr status: !data')
            jdata = self.github.get_request(surl)
            # FIXME? should we self.log_ci_status(jdata) here too?
            fetched = True

        if fetched or not os.path.isfile(pfile):
            logging.info(u'writing %s' % pfile)
            pdata = (self.pullrequest.updated_at, jdata)
            with open(pfile, 'wb') as f:
                pickle_dump(pdata, f)

        return jdata
Beispiel #7
0
 def load_issue(self, number):
     pfile = os.path.join(
         self.cachedir,
         u'issues',
         to_text(number),
         u'issue.pickle'
     )
     if os.path.isfile(pfile):
         with open(pfile, 'rb') as f:
             issue = pickle_load(f)
         return issue
     else:
         return False
Beispiel #8
0
 def _load_cache(self):
     if not os.path.isdir(self.cachedir):
         os.makedirs(self.cachedir)
     if not os.path.isfile(self.cachefile):
         logging.info(u'!%s' % self.cachefile)
         return None
     try:
         with open(self.cachefile, 'rb') as f:
             cachedata = pickle_load(f)
     except Exception as e:
         logging.debug(e)
         logging.info(u'%s failed to load' % self.cachefile)
         cachedata = None
     return cachedata
 def _load_cache(self):
     if not os.path.isdir(self.cachedir):
         os.makedirs(self.cachedir)
     if not os.path.isfile(self.cachefile):
         logging.info(u'!%s' % self.cachefile)
         return None
     try:
         with open(self.cachefile, 'rb') as f:
             cachedata = pickle_load(f)
     except Exception as e:
         logging.debug(e)
         logging.info(u'%s failed to load' % self.cachefile)
         cachedata = None
     return cachedata
Beispiel #10
0
 def load_pullrequest(self, number):
     pfile = os.path.join(
         self.cachedir,
         u'issues',
         to_text(number),
         u'pullrequest.pickle'
     )
     pdir = os.path.dirname(pfile)
     if not os.path.isdir(pdir):
         os.makedirs(pdir)
     if os.path.isfile(pfile):
         with open(pfile, 'rb') as f:
             issue = pickle_load(f)
         return issue
     else:
         return False
Beispiel #11
0
    def _load_cache(self):
        if not os.path.isdir(self.cachedir):
            os.makedirs(self.cachedir)
        if not os.path.isfile(self.cachefile):
            logging.info(u'!%s' % self.cachefile)
            return None
        try:
            with open(self.cachefile, 'rb') as f:
                cachedata = pickle_load(f)
        except Exception as e:
            logging.debug(e)
            logging.info(u'%s failed to load' % self.cachefile)
            cachedata = None

        cachedata[u'history'] = self._fix_comments_with_no_body(
            cachedata[u'history'])

        return cachedata
Beispiel #12
0
    def get_members(self, organization):
        """Get members of an organization

        Args:
            organization: name of the organization

        Returns:
            A list of GitHub login belonging to the organization
        """
        members = []
        update = False
        write_cache = False
        now = self.get_current_time()
        gh_org = self._connect().get_organization(organization)

        cachedir = os.path.join(self.cachedir_base, organization)
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

        cachefile = os.path.join(cachedir, 'members.pickle')

        if os.path.isfile(cachefile):
            with open(cachefile, 'rb') as f:
                mdata = pickle_load(f)
            members = mdata[1]
            if mdata[0] < gh_org.updated_at:
                update = True
        else:
            update = True
            write_cache = True

        if update:
            members = gh_org.get_members()
            members = [x.login for x in members]

        # save the data
        if write_cache:
            mdata = [now, members]
            with open(cachefile, 'wb') as f:
                pickle_dump(mdata, f)

        return members
Beispiel #13
0
    def get_members(self, organization):
        """Get members of an organization

        Args:
            organization: name of the organization

        Returns:
            A list of GitHub login belonging to the organization
        """
        members = []
        update = False
        write_cache = False
        now = self.get_current_time()
        gh_org = self._connect().get_organization(organization)

        cachedir = os.path.join(self.cachedir_base, organization)
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

        cachefile = os.path.join(cachedir, 'members.pickle')

        if os.path.isfile(cachefile):
            with open(cachefile, 'rb') as f:
                mdata = pickle_load(f)
            members = mdata[1]
            if mdata[0] < gh_org.updated_at:
                update = True
        else:
            update = True
            write_cache = True

        if update:
            members = gh_org.get_members()
            members = [x.login for x in members]

        # save the data
        if write_cache:
            mdata = [now, members]
            with open(cachefile, 'wb') as f:
                pickle_dump(mdata, f)

        return members
Beispiel #14
0
    def jobs(self):
        if self._jobs is None:
            if self.build_id:
                if not os.path.isdir(self._cachedir):
                    os.makedirs(self._cachedir)
                cache_file = os.path.join(
                    self._cachedir, u'timeline_%s.pickle' % self.build_id)

                resp = fetch(TIMELINE_URL_FMT % self.build_id)
                if resp is None:
                    data = None
                    if os.path.isfile(cache_file):
                        logging.info(
                            u'timeline was probably removed, load it from cache'
                        )
                        with open(cache_file, 'rb') as f:
                            data = pickle_load(f)
                else:
                    data = resp.json()
                    data = (strip_time_safely(data['lastChangedOn']), data)
                    logging.info(u'writing %s' % cache_file)
                    with open(cache_file, 'wb') as f:
                        pickle_dump(data, f)

                if data is not None:
                    data = data[1]
                    self._jobs = [
                        r for r in data['records'] if r['type'] == 'Job'
                    ]
                    self._updated_at = strip_time_safely(
                        data['lastChangedOn'])  # FIXME
                    self._stages = [
                        r for r in data['records'] if r['type'] == 'Stage'
                    ]  # FIXME
                else:
                    self._jobs = []
                    self._updated_at = strip_time_safely('1970-01-01')
                    self._stages = []
            else:
                self._jobs = []
        return self._jobs
Beispiel #15
0
    def get_artifact(self, name, url):
        if not os.path.isdir(self._cachedir):
            os.makedirs(self._cachedir)

        data = None
        cache_file = os.path.join(
            self._cachedir,
            u'%s_%s.pickle' % (name.replace(' ', '-'), self.build_id))
        if os.path.isfile(cache_file):
            logging.info(u'loading %s' % cache_file)
            with open(cache_file, 'rb') as f:
                data = pickle_load(f)

        if data is None or (data and data[0] < self.updated_at) or not data[1]:
            if data:
                logging.info(u'fetching artifacts: stale, previous from %s' %
                             data[0])
            else:
                logging.info(u'fetching artifacts: stale, no previous data')

            resp = fetch(url, stream=True)
            if resp is not None:
                with BytesIO() as data:
                    for chunk in resp.iter_content(chunk_size=128):
                        data.write(chunk)
                    artifact_zip = ZipFile(data)

                    artifact_data = []
                    for fn in artifact_zip.namelist():
                        if 'ansible-test-' not in fn:
                            continue
                        with artifact_zip.open(fn) as f:
                            artifact_data.append(json.load(f))

                    data = (self.updated_at, artifact_data)
                    logging.info(u'writing %s' % cache_file)
                    with open(cache_file, 'wb') as f:
                        pickle_dump(data, f)
        if data:
            return data[1]
Beispiel #16
0
    def artifacts(self):
        if self._artifacts is None:
            # FIXME deduplicate code
            if not os.path.isdir(self._cachedir):
                os.makedirs(self._cachedir)

            data = None
            cache_file = os.path.join(self._cachedir,
                                      u'artifacts_%s.pickle' % self.build_id)
            if os.path.isfile(cache_file):
                logging.info(u'load artifacts cache')
                with open(cache_file, 'rb') as f:
                    data = pickle_load(f)

            if data is None or (data
                                and data[0] < self.updated_at) or not data[1]:
                if data:
                    logging.info(
                        u'fetching artifacts: stale, previous from %s' %
                        data[0])
                else:
                    logging.info(
                        u'fetching artifacts: stale, no previous data')

                resp = fetch(ARTIFACTS_URL_FMT % self.build_id)
                if resp is not None:
                    data = [
                        a for a in resp.json()['value']
                        if a['name'].startswith('Bot')
                    ]
                    data = (self.updated_at, data)

                    logging.info(u'writing %s' % cache_file)
                    with open(cache_file, 'wb') as f:
                        pickle_dump(data, f)
            if data:
                self._artifacts = data[1]

        return self._artifacts
Beispiel #17
0
    def load_issues(self, state=u'open', filter=None):
        issues = []
        gfiles = glob.glob(u'%s/issues/*/issue.pickle' % self.cachedir)
        for gf in gfiles:

            if filter:
                gf_parts = gf.split(u'/')
                this_number = gf_parts[-2]
                this_number = int(this_number)
                if this_number not in filter:
                    continue

            logging.debug(u'load %s' % gf)
            issue = None
            try:
                with open(gf, 'rb') as f:
                    issue = pickle_load(f)
            except EOFError as e:
                # this is bad, get rid of it
                logging.error(e)
                os.remove(gf)
            if issue:
                issues.append(issue)
        return issues
Beispiel #18
0
    def load_issues(self, state=u'open', filter=None):
        issues = []
        gfiles = glob.glob(u'%s/issues/*/issue.pickle' % self.cachedir)
        for gf in gfiles:

            if filter:
                gf_parts = gf.split(u'/')
                this_number = gf_parts[-2]
                this_number = int(this_number)
                if this_number not in filter:
                    continue

            logging.debug(u'load %s' % gf)
            issue = None
            try:
                with open(gf, 'rb') as f:
                    issue = pickle_load(f)
            except EOFError as e:
                # this is bad, get rid of it
                logging.error(e)
                os.remove(gf)
            if issue:
                issues.append(issue)
        return issues
def main():
    pprint(sys.argv)
    dest = sys.argv[1]
    print('dest: %s' % dest)

    # get_valid_labels('ansible/ansible')
    # /home/jtanner/.ansibullbot/cache/ansible/ansible/labels.pickle

    with open(
            os.path.expanduser(
                '~/.ansibullbot/cache/ansible/ansible/labels.pickle'),
            'rb') as f:
        labels = pickle_load(f)
    valid_labels = [x.name for x in labels[1]]

    FILEMAP_FILENAME = 'FILEMAP.json'
    COMPONENTMAP_FILENAME = 'COMPONENTMAP.json'
    FI = FileIndexer(
        checkoutdir=os.path.expanduser(
            '~/.ansibullbot/cache/ansible.files.checkout'),
        cmap=COMPONENTMAP_FILENAME,
    )

    module_cache_file = '/tmp/mi-modules.json'
    if not os.path.isfile(module_cache_file):
        module_maintainers = get_maintainers_mapping()
        MI = ModuleIndexer(maintainers=module_maintainers)
        MI.get_ansible_modules()
        with open(module_cache_file, 'wb') as f:
            f.write(json.dumps(MI.modules, sort_keys=True, indent=2))
        modules = MI.modules
    else:
        with open(module_cache_file, 'rb') as f:
            modules = json.loads(f.read())

    macro_teams = {
        'Qalthos,gundalow,privateip':
        'openswitch',
        'Qalthos,ganeshrn,gundalow,privateip,rcarrillocruz,trishnaguha':
        'networking',
        'GGabriele,jedelman8,mikewiebe,privateip,rahushen,rcarrillocruz,trishnaguha':
        'nxos',
        'emonty,j2sol,juliakreger,rcarrillocruz,shrews,thingee':
        'openstack',
        'chrishoffman,manuel-sousa,romanek-adam':
        'rabbitmq',
        'alikins,barnabycourt,flossware,vritant':
        'rhn',
        'Qalthos,amitsi,gundalow,privateip':
        'netvisor',
        'haroldwongms,nitzmahone,tstringer':
        'azure',
        'dagwieers,jborean93,jhawkesworth':
        'windows',
        'dagwieers,dav1x,jctanner':
        'vmware',
        'isharacomix,jrrivers,privateip':
        'cumulus',
        'chiradeep,giorgos-nikolopoulos':
        'netscaler',
        'ericsysmin,grastogi23,khaltore':
        'avi',
        'ghjm,jlaska,matburt,wwitzel3':
        'tower',
        'hulquest,lmprice,timuster':
        'netapp',
    }

    usermap = {'mpdehaan': False}
    namemap = {'Shrews': 'shrews'}
    exclusions = {
        '*': [
            'chouseknecht', 'Java1Guy', 'franckcuny', 'mhite', 'bennojoy',
            'risaacson', 'whenrik'
        ],
        'network/wakeonlan': ['dagwiers'],
    }

    removed = get_removed_maintainers()

    teams = {}
    data = {}
    data['files'] = {}

    # merge the moduleindexer data
    for k, v in modules.items():
        fp = v.get('filepath')
        if not fp or not fp.startswith('lib/ansible'):
            continue
        data['files'][k] = {}
        if v['_maintainers']:
            data['files'][k]['maintainers'] = []
            data['files'][k]['maintainers'] = [x for x in v['_maintainers']]
        if v['authors']:
            if 'maintainers' not in data['files'][k]:
                data['files'][k]['maintainers'] = []
            data['files'][k]['maintainers'] += v['authors']
            data['files'][k]['maintainers'] = sorted(
                set(data['files'][k]['maintainers']))

        # validate each maintainer exists
        if 'maintainers' in data['files'][k]:
            maintainers = []
            for x in data['files'][k]['maintainers']:

                if x in exclusions['*']:
                    continue

                if x in namemap:
                    x = namemap[x]
                if x in usermap:
                    if usermap[x]:
                        maintainers.append(x)
                else:
                    if x == 'ansible':
                        usermap['ansible'] = True
                        maintainers.append(x)
                        continue
                    res = requests.get('https://github.com/%s' % x)
                    if res.status_code == 200:
                        usermap[x] = True
                        maintainers.append(x)
                    else:
                        usermap[x] = False
            data['files'][k]['maintainers'] = sorted(set(maintainers))
            if not data['files'][k]['maintainers']:
                data['files'][k].pop('maintainers', None)

    # merge the removed people
    for k, v in removed.items():
        k = os.path.join('lib/ansible/modules', k)
        v = sorted(set(v))
        if k in data['files']:
            if 'maintainers' in data['files'][k]:
                for vx in v:
                    if vx in data['files'][k]['maintainers']:
                        data['files'][k]['maintainers'].remove(vx)
                        if 'ignored' not in data['files'][k]:
                            data['files'][k]['ignored'] = []
                        data['files'][k]['ignored'].append(vx)
                if not data['files'][k]['maintainers']:
                    data['files'][k].pop('maintainers', None)
                    #import epdb; epdb.st()

    # merge the fileindexer data
    for k in FI.files:
        #if 'contrib/inventory' in k:
        #    import epdb; epdb.st()
        #print(k)
        try:
            klabels = FI.get_component_labels(valid_labels, [k])
            if klabels:
                klabels = [x for x in klabels if not x.startswith('c:')]
                if not klabels:
                    continue
                if k not in data['files']:
                    data['files'][k] = {}
                if 'labels' not in data['files'][k]:
                    data['files'][k]['labels'] = []
                data['files'][k]['labels'] += klabels
        except UnicodeDecodeError:
            continue

        keywords = FI.get_keywords_for_file(k)
        if keywords:
            if k not in data['files']:
                data['files'][k] = {}
            if 'keywords' not in data['files'][k]:
                data['files'][k]['keywords'] = []
            data['files'][k]['keywords'] += keywords
            #import epdb; epdb.st()
    '''
    # calculate all teams
    for k,v in data['files'].items():
        if not v.get('maintainers'):
            continue
        maintainers = sorted(set(v['maintainers']))
        key = ','.join(maintainers)
        if key not in teams:
            teams[key] = []
        teams[key].append(k)

    # rank and show
    steams = sorted(teams, key=len, reverse=True)
    for x in steams[0:15]:
        if x in macro_teams:
            continue
        pprint(teams[x])
        print(x)
        import epdb; epdb.st()
    import epdb; epdb.st()
    '''

    for k, v in data['files'].items():
        if not v.get('maintainers'):
            continue
        maintainers = v.get('maintainers')
        for idx, x in enumerate(maintainers):
            if x == 'ansible':
                maintainers[idx] = '$team_ansible'
        if maintainers == ['$team_ansible']:
            data['files'][k]['maintainers'] = ' '.join(maintainers)
            continue
        if len(maintainers) == 1:
            data['files'][k]['maintainers'] = ' '.join(maintainers)
            continue
        mkey = ','.join(sorted(set(maintainers)))
        if mkey in macro_teams:
            maintainers = ['$team_%s' % macro_teams[mkey]]
            data['files'][k]['maintainers'] = ' '.join(maintainers)
        else:
            # partial matching
            match = None
            subnames = sorted(set(maintainers))
            for sn in subnames:
                filtered = [x for x in subnames if x != sn]
                fkey = ','.join(filtered)
                if fkey in macro_teams:
                    match = fkey
            if match:
                to_clear = match.split(',')
                maintainers = [x for x in maintainers if x not in to_clear]
                data['files'][k]['maintainers'] = ' '.join(maintainers)

    # fix deprecations
    safe_names = [x for x in FI.files if all(c in string.printable for c in x)]
    remove = []
    for k, v in data['files'].items():
        maintainers = v.get('maintainers')
        if maintainers:
            if 'DEPRECATED' in data['files'][k]['maintainers']:
                data['files'][k].pop('maintainers', None)
                data['files'][k]['deprecated'] = True
        bn = os.path.basename(k)
        if bn.startswith('_') and bn != '__init__.py' and '/modules/' in k:
            '''
            data['files'][k]['deprecated'] = True
            if 'maintainers' in data['files'][k]:
                data['files'][k].pop('maintainers', None)
            '''
            remove.append(k)

        # get rid of files no longer in the repo
        if k not in safe_names:
            remove.append(k)

    for x in remove:
        data['files'].pop(x, None)

    # remove any keys where maintainers == authors
    remove = []
    for k, v in data['files'].items():
        if v.keys() != ['maintainers']:
            continue
        if v['maintainers'] != modules[k]['authors']:
            continue
        remove.append(k)
    for x in remove:
        data['files'].pop(x, None)

    #####################################
    # add special notifies
    #####################################
    data['files']['lib/ansible/modules/cloud/amazon/'] = {
        'notify': ['willthames']
    }

    #####################################
    # reduce to namespace maintainers
    #####################################
    groups = {}
    for k, v in data['files'].items():
        dn = os.path.dirname(k)
        if dn not in groups:
            groups[dn] = {'matches': [], 'values': []}
        groups[dn]['matches'].append(k)
        if v not in groups[dn]['values']:
            groups[dn]['values'].append(v)
    for k, v in groups.items():
        if not len(v['values']) == 1:
            continue
        if len(v['matches']) == 1:
            continue
        #print(k)
        #pprint(v)

        newk = k + '/'
        data['files'][newk] = v['values'][0]
        for pf in v['matches']:
            data['files'].pop(pf, None)

        if newk in removed:
            import epdb
            epdb.st()

    #####################################
    # make a sorted dict
    #####################################

    files = data['files']
    data['files'] = OrderedDict()
    fkeys = sorted(files.keys())
    fkeys = [x.replace('lib/ansible/modules', '$modules') for x in fkeys]
    fkeys = sorted(set(fkeys))
    for fkey in fkeys:
        if fkey.startswith('$modules'):
            mkey = fkey.replace('$modules', 'lib/ansible/modules')
            data['files'][fkey] = files[mkey]
        else:
            data['files'][fkey] = files[fkey]

    data['macros'] = OrderedDict()
    data['macros']['modules'] = 'lib/ansible/modules'
    macro_items = macro_teams.items()
    macro_items = [[x[1], x[0]] for x in macro_items]
    macro_dict = {}
    for x in macro_items:
        macro_dict[x[0]] = x[1]

    data['macros']['team_ansible'] = []
    keys = macro_dict.keys()
    for k in sorted(keys):
        team = macro_dict[k]
        team = team.split(',')
        if len(team) < 10:
            team = " ".join(team)
        data['macros']['team_%s' % k] = team

    # if maintainers is the only subkey, make the primary value a string
    for k, v in data['files'].items():
        keys = v.keys()
        if keys == ['maintainers']:
            if isinstance(v['maintainers'], list):
                data['files'][k] = " ".join(v['maintainers'])
            else:
                data['files'][k] = v['maintainers']
        for xk in ['ignored', 'notified', 'maintainers']:
            if xk in data['files'][k]:
                if not isinstance(data['files'][k][xk], (str, unicode)):
                    data['files'][k][xk] = " ".join(data['files'][k][xk])

    # write it once with ryaml to make it ordered
    ryaml = rYAML()
    (fo, fn) = tempfile.mkstemp()
    with open(fn, 'wb') as f:
        ryaml.dump(data, f)

    # read it back in
    with open(fn, 'rb') as f:
        ylines = f.readlines()

    phase = None
    for idx, x in enumerate(ylines):
        x = x.rstrip()
        x = x.replace('!!omap', '')
        if x.endswith(' {}'):
            x = x.replace(' {}', '')
        if x.startswith('-'):
            x = x.replace('-', ' ', 1)
        ylines[idx] = x

        if x.startswith(' ') and ':' not in x and '-' not in x:
            ylines[idx - 1] += ' ' + x.strip()
            ylines[idx] = ''

    ylines = [x for x in ylines if x.strip()]
    ylines = [HEADER] + ylines

    with open(dest, 'wb') as f:
        f.write('\n'.join(ylines))
Beispiel #20
0
    def load_update_fetch(self, property_name, obj=None, force=False):
        '''Fetch a property for an issue object'''

        # A pygithub issue object has methods such as ...
        #   - get_events()
        #   - get_comments()
        # Those methods return a list with no update() property,
        # so we can't take advantage of the caching scheme used
        # for the issue it's self. Instead this function calls
        # those methods by their given name, and write the data
        # to a pickle file with a timestamp for the fetch time.
        # Upon later loading of the pickle, the timestamp is
        # compared to the issue's update_at timestamp and if the
        # pickle data is behind, the process will be repeated.

        edata = None
        events = []
        updated = None
        update = False
        write_cache = False

        pfile = os.path.join(self.full_cachedir, u'%s.pickle' % property_name)
        pdir = os.path.dirname(pfile)
        logging.debug(pfile)

        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            try:
                with open(pfile, 'rb') as f:
                    edata = pickle_load(f)
            except Exception as e:
                update = True
                write_cache = True

        # check the timestamp on the cache
        if edata:
            updated = edata[0]
            events = edata[1]
            if updated < self.instance.updated_at:
                update = True
                write_cache = True

        baseobj = None
        if obj:
            if obj == u'issue':
                baseobj = self.instance
            elif obj == u'pullrequest':
                baseobj = self.pullrequest
        else:
            if hasattr(self.instance, u'get_' + property_name):
                baseobj = self.instance
            else:
                if self.pullrequest:
                    if hasattr(self.pullrequest, u'get_' + property_name):
                        baseobj = self.pullrequest

        if not baseobj:
            logging.error(
                u'%s was not a property for the issue or the pullrequest' %
                property_name)
            if C.DEFAULT_BREAKPOINTS:
                logging.error(u'breakpoint!')
                import epdb
                epdb.st()
            else:
                raise Exception(u'property error')

        # pull all events if timestamp is behind or no events cached
        if update or not events or force:
            write_cache = True
            updated = datetime.datetime.utcnow()

            if not hasattr(baseobj, u'get_' + property_name) \
                    and hasattr(baseobj, property_name):
                # !callable properties
                try:
                    methodToCall = getattr(baseobj, property_name)
                except Exception as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb
                        epdb.st()
                    else:
                        raise Exception(to_text(e))
                events = methodToCall
            else:
                # callable properties
                try:
                    methodToCall = getattr(baseobj, u'get_' + property_name)
                except Exception as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb
                        epdb.st()
                    else:
                        raise Exception(to_text(e))
                events = [x for x in methodToCall()]

        if C.DEFAULT_PICKLE_ISSUES:
            if write_cache or not os.path.isfile(pfile) or force:
                # need to dump the pickle back to disk
                edata = [updated, events]
                with open(pfile, 'wb') as f:
                    pickle_dump(edata, f)

        return events
    def get_pullrequest_status(self, force_fetch=False):
        def sort_unique_statuses(statuses):
            '''reduce redundant statuses to the final run for each id'''
            result = []
            groups = []
            thisgroup = []
            for idx, x in enumerate(statuses):
                if not thisgroup:
                    thisgroup.append(x)
                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)
                    continue
                else:
                    if thisgroup[-1][u'target_url'] == x[u'target_url']:
                        thisgroup.append(x)
                    else:
                        groups.append(thisgroup)
                        thisgroup = []
                        thisgroup.append(x)

                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)

            for group in groups:
                group.sort(key=operator.itemgetter(u'updated_at'))
                result.append(group[-1])

            return result

        fetched = False
        jdata = None
        pdata = None
        # pull out the status url from the raw data
        rd = self.pullrequest_raw_data
        surl = rd[u'statuses_url']

        pfile = os.path.join(self.cachedir, u'issues', to_text(self.number),
                             u'pr_status.pickle')
        pdir = os.path.dirname(pfile)
        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            logging.info(u'pullrequest_status load pfile')
            with open(pfile, 'rb') as f:
                pdata = pickle_load(f)

        if pdata:
            # is the data stale?
            if pdata[0] < self.pullrequest.updated_at or force_fetch:
                logging.info(u'fetching pr status: stale, previous from %s' %
                             pdata[0])
                jdata = self._fetch_api_url(surl)
                self.log_ci_status(jdata)
                fetched = True
            else:
                jdata = pdata[1]

        # missing?
        if not jdata:
            logging.info(u'fetching pr status: !data')
            jdata = self._fetch_api_url(surl)
            fetched = True

        if fetched or not os.path.isfile(pfile):
            logging.info(u'writing %s' % pfile)
            pdata = (self.pullrequest.updated_at, jdata)
            with open(pfile, 'wb') as f:
                pickle_dump(pdata, f)

        # remove intermediate duplicates
        #jdata = sort_unique_statuses(jdata)

        return jdata
Beispiel #22
0
    def _get_module_blames(self):
        ''' Scrape the blame page for each module and store it '''

        keys = sorted(self.modules.keys())

        # scrape the data
        for k in keys:

            cpath = os.path.join(self.gitrepo.checkoutdir, k)
            if not os.path.isfile(cpath):
                self.committers[k] = {}
                continue

            ghash = self.last_commit_for_file(k)
            pfile = os.path.join(
                self.scraper_cache,
                k.replace(u'/', u'_') + u'.blame.pickle'
            )
            sargs = [u'ansible', u'ansible', u'devel', k]

            refresh = False
            if not os.path.isfile(pfile):
                refresh = True
            else:
                logging.debug(u'load {}'.format(pfile))
                with open(pfile, 'rb') as f:
                    pdata = pickle_load(f)
                if C.DEFAULT_BREAKPOINTS:
                    logging.error(u'breakpoint!')
                    import epdb; epdb.st()
                if pdata[0] == ghash:
                    self.committers[k] = pdata[1]
                    if len(pdata) == 3:
                        # use emailmap if available
                        emailmap = pdata[2]
                    else:
                        emailmap = {}
                else:
                    refresh = True

            if refresh:
                if self.gqlc:
                    logging.debug(u'graphql blame usernames {}'.format(pfile))
                    uns, emailmap = self.gqlc.get_usernames_from_filename_blame(*sargs)
                else:
                    emailmap = {}  # scrapping: emails not available
                    logging.debug(u'www blame usernames {}'.format(pfile))
                    uns = self.gws.get_usernames_from_filename_blame(*sargs)
                self.committers[k] = uns
                with open(pfile, 'wb') as f:
                    pickle_dump((ghash, uns, emailmap), f)

            for email, github_id in emailmap.items():
                if email not in self.emails_cache:
                    self.emails_cache[email] = github_id

        # add scraped logins to the map
        for k in keys:
            for idx, x in enumerate(self.commits[k]):
                if x[u'email'] in [u'@']:
                    continue
                if x[u'email'] not in self.emails_cache:
                    self.emails_cache[x[u'email']] = None
                if x[u'login']:
                    self.emails_cache[x[u'email']] = x[u'login']
                    continue

                xhash = x[u'hash']
                for ck, cv in six.iteritems(self.committers[k]):
                    if xhash in cv:
                        self.emails_cache[x[u'email']] = ck
                        break

        # fill in what we can ...
        for k in keys:
            for idx, x in enumerate(self.commits[k]):
                if not x[u'login']:
                    if x[u'email'] in [u'@']:
                        continue
                    if self.emails_cache[x[u'email']]:
                        login = self.emails_cache[x[u'email']]
                        xhash = x[u'hash']
                        self.commits[k][idx][u'login'] = login
                        if login not in self.committers[k]:
                            self.committers[k][login] = []
                        if xhash not in self.committers[k][login]:
                            self.committers[k][login].append(xhash)
Beispiel #23
0
    def _get_module_blames(self):
        ''' Scrape the blame page for each module and store it '''

        keys = sorted(self.modules.keys())

        # scrape the data
        for k in keys:

            cpath = os.path.join(self.gitrepo.checkoutdir, k)
            if not os.path.isfile(cpath):
                self.committers[k] = {}
                continue

            ghash = self.last_commit_for_file(k)
            pfile = os.path.join(self.scraper_cache,
                                 k.replace(u'/', u'_') + u'.blame.pickle')
            sargs = [u'ansible', u'ansible', u'devel', k]

            refresh = False
            if not os.path.isfile(pfile):
                refresh = True
            else:
                logging.debug(u'load {}'.format(pfile))
                with open(pfile, 'rb') as f:
                    pdata = pickle_load(f)
                if C.DEFAULT_BREAKPOINTS:
                    logging.error(u'breakpoint!')
                    import epdb
                    epdb.st()
                if pdata[0] == ghash:
                    self.committers[k] = pdata[1]
                    if len(pdata) == 3:
                        # use emailmap if available
                        emailmap = pdata[2]
                    else:
                        emailmap = {}
                else:
                    refresh = True

            if refresh:
                if self.gqlc:
                    logging.debug(u'graphql blame usernames {}'.format(pfile))
                    uns, emailmap = self.gqlc.get_usernames_from_filename_blame(
                        *sargs)
                else:
                    emailmap = {}  # scrapping: emails not available
                    logging.debug(u'www blame usernames {}'.format(pfile))
                    uns = self.gws.get_usernames_from_filename_blame(*sargs)
                self.committers[k] = uns
                with open(pfile, 'wb') as f:
                    pickle_dump((ghash, uns, emailmap), f)

            for email, github_id in emailmap.items():
                if email not in self.emails_cache:
                    self.emails_cache[email] = github_id

        # add scraped logins to the map
        for k in keys:
            for idx, x in enumerate(self.commits[k]):
                if x[u'email'] in [u'@']:
                    continue
                if x[u'email'] not in self.emails_cache:
                    self.emails_cache[x[u'email']] = None
                if x[u'login']:
                    self.emails_cache[x[u'email']] = x[u'login']
                    continue

                xhash = x[u'hash']
                for ck, cv in six.iteritems(self.committers[k]):
                    if xhash in cv:
                        self.emails_cache[x[u'email']] = ck
                        break

        # fill in what we can ...
        for k in keys:
            for idx, x in enumerate(self.commits[k]):
                if not x[u'login']:
                    if x[u'email'] in [u'@']:
                        continue
                    if self.emails_cache[x[u'email']]:
                        login = self.emails_cache[x[u'email']]
                        xhash = x[u'hash']
                        self.commits[k][idx][u'login'] = login
                        if login not in self.committers[k]:
                            self.committers[k][login] = []
                        if xhash not in self.committers[k][login]:
                            self.committers[k][login].append(xhash)
Beispiel #24
0
    def get_module_commits(self):
        keys = self.modules.keys()
        keys = sorted(keys)
        for k in keys:
            self.commits[k] = []
            cpath = os.path.join(self.gitrepo.checkoutdir, k)
            if not os.path.isfile(cpath):
                continue

            mtime = os.path.getmtime(cpath)
            refresh = False
            pfile = os.path.join(self.scraper_cache,
                                 k.replace(u'/', u'_') + u'.commits.pickle')

            if not os.path.isfile(pfile):
                refresh = True
            else:
                pickle_kwargs = {'encoding': 'bytes'} if six.PY3 else {}
                print(pfile)
                with open(pfile, 'rb') as f:
                    pdata = pickle_load(f, **pickle_kwargs)
                if pdata[0] == mtime:
                    self.commits[k] = pdata[1]
                else:
                    refresh = True

            if refresh:
                logging.info(u'refresh commit cache for %s' % k)
                cmd = u'cd %s; git log --follow %s' % (
                    self.gitrepo.checkoutdir, k)
                (rc, so, se) = run_command(cmd)
                for line in to_text(so).split(u'\n'):
                    if line.startswith(u'commit '):
                        commit = {
                            u'name': None,
                            u'email': None,
                            u'login': None,
                            u'hash': line.split()[-1],
                            u'date': None
                        }

                    # Author: Matt Clay <*****@*****.**>
                    if line.startswith(u'Author: '):
                        line = line.replace(u'Author: ', u'')
                        line = line.replace(u'<', u'')
                        line = line.replace(u'>', u'')
                        lparts = line.split()

                        if u'@' in lparts[-1]:
                            commit[u'email'] = lparts[-1]
                            commit[u'name'] = u' '.join(lparts[:-1])
                        else:
                            pass

                        if commit[u'email'] and \
                                u'noreply.github.com' in commit[u'email']:
                            commit[u'login'] = commit[u'email'].split(u'@')[0]

                    # Date:   Sat Jan 28 23:28:53 2017 -0800
                    if line.startswith(u'Date:'):
                        dstr = line.split(u':', 1)[1].strip()
                        dstr = u' '.join(dstr.split(u' ')[:-1])
                        ds = datetime.datetime.strptime(
                            to_text(dstr), u'%a %b %d %H:%M:%S %Y')
                        commit[u'date'] = ds
                        self.commits[k].append(commit)

                with open(pfile, 'wb') as f:
                    pickle_dump((mtime, self.commits[k]), f)
Beispiel #25
0
    def load_update_fetch(self, property_name, obj=None):
        '''Fetch a property for an issue object'''

        # A pygithub issue object has methods such as ...
        #   - get_events()
        #   - get_comments()
        # Those methods return a list with no update() property,
        # so we can't take advantage of the caching scheme used
        # for the issue it's self. Instead this function calls
        # those methods by their given name, and write the data
        # to a pickle file with a timestamp for the fetch time.
        # Upon later loading of the pickle, the timestamp is
        # compared to the issue's update_at timestamp and if the
        # pickle data is behind, the process will be repeated.

        edata = None
        events = []
        updated = None
        update = False
        write_cache = False

        pfile = os.path.join(
            self.cachedir,
            u'issues',
            to_text(self.instance.number),
            u'%s.pickle' % property_name
        )
        pdir = os.path.dirname(pfile)
        logging.debug(pfile)

        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            try:
                with open(pfile, 'rb') as f:
                    edata = pickle_load(f)
            except Exception as e:
                update = True
                write_cache = True

        # check the timestamp on the cache
        if edata:
            updated = edata[0]
            events = edata[1]
            if updated < self.instance.updated_at:
                update = True
                write_cache = True

        baseobj = None
        if obj:
            if obj == u'issue':
                baseobj = self.instance
            elif obj == u'pullrequest':
                baseobj = self.pullrequest
        else:
            if hasattr(self.instance, u'get_' + property_name):
                baseobj = self.instance
            else:
                if self.pullrequest:
                    if hasattr(self.pullrequest, u'get_' + property_name):
                        baseobj = self.pullrequest

        if not baseobj:
            logging.error(
                u'%s was not a property for the issue or the pullrequest'
                % property_name
            )
            if C.DEFAULT_BREAKPOINTS:
                logging.error(u'breakpoint!')
                import epdb; epdb.st()
            else:
                raise Exception(u'property error')

        # pull all events if timestamp is behind or no events cached
        if update or not events:
            write_cache = True
            updated = self.get_current_time()

            if not hasattr(baseobj, u'get_' + property_name) \
                    and hasattr(baseobj, property_name):
                # !callable properties
                try:
                    methodToCall = getattr(baseobj, property_name)
                except Exception as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb; epdb.st()
                    else:
                        raise Exception(to_text(e))
                events = methodToCall
            else:
                # callable properties
                try:
                    methodToCall = getattr(baseobj, u'get_' + property_name)
                except Exception as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb; epdb.st()
                    else:
                        raise Exception(to_text(e))
                events = [x for x in methodToCall()]

        if write_cache or not os.path.isfile(pfile):
            # need to dump the pickle back to disk
            edata = [updated, events]
            with open(pfile, 'wb') as f:
                pickle_dump(edata, f)

        return events
Beispiel #26
0
    def get_pullrequest_status(self, force_fetch=False):

        def sort_unique_statuses(statuses):
            '''reduce redundant statuses to the final run for each id'''
            result = []
            groups = []
            thisgroup = []
            for idx, x in enumerate(statuses):
                if not thisgroup:
                    thisgroup.append(x)
                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)
                    continue
                else:
                    if thisgroup[-1][u'target_url'] == x[u'target_url']:
                        thisgroup.append(x)
                    else:
                        groups.append(thisgroup)
                        thisgroup = []
                        thisgroup.append(x)

                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)

            for group in groups:
                group.sort(key=operator.itemgetter(u'updated_at'))
                result.append(group[-1])

            return result

        fetched = False
        jdata = None
        pdata = None
        # pull out the status url from the raw data
        rd = self.pullrequest_raw_data
        surl = rd[u'statuses_url']

        pfile = os.path.join(
            self.cachedir,
            u'issues',
            to_text(self.number),
            u'pr_status.pickle'
        )
        pdir = os.path.dirname(pfile)
        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            logging.info(u'pullrequest_status load pfile')
            with open(pfile, 'rb') as f:
                pdata = pickle_load(f)

        if pdata:
            # is the data stale?
            if pdata[0] < self.pullrequest.updated_at or force_fetch:
                logging.info(u'fetching pr status: stale, previous from %s' % pdata[0])
                jdata = self._fetch_api_url(surl)
                self.log_ci_status(jdata)
                fetched = True
            else:
                jdata = pdata[1]

        # missing?
        if not jdata:
            logging.info(u'fetching pr status: !data')
            jdata = self._fetch_api_url(surl)
            fetched = True

        if fetched or not os.path.isfile(pfile):
            logging.info(u'writing %s' % pfile)
            pdata = (self.pullrequest.updated_at, jdata)
            with open(pfile, 'wb') as f:
                pickle_dump(pdata, f)

        # remove intermediate duplicates
        #jdata = sort_unique_statuses(jdata)

        return jdata
Beispiel #27
0
    def get_module_commits(self):
        keys = self.modules.keys()
        keys = sorted(keys)
        for k in keys:
            self.commits[k] = []
            cpath = os.path.join(self.gitrepo.checkoutdir, k)
            if not os.path.isfile(cpath):
                continue

            mtime = os.path.getmtime(cpath)
            refresh = False
            pfile = os.path.join(
                self.scraper_cache,
                k.replace(u'/', u'_') + u'.commits.pickle'
            )

            if not os.path.isfile(pfile):
                refresh = True
            else:
                pickle_kwargs = {'encoding': 'bytes'} if six.PY3 else {}
                print(pfile)
                with open(pfile, 'rb') as f:
                    pdata = pickle_load(f, **pickle_kwargs)
                if pdata[0] == mtime:
                    self.commits[k] = pdata[1]
                else:
                    refresh = True

            if refresh:
                logging.info(u'refresh commit cache for %s' % k)
                cmd = u'cd %s; git log --follow %s' % (self.gitrepo.checkoutdir, k)
                (rc, so, se) = run_command(cmd)
                for line in to_text(so).split(u'\n'):
                    if line.startswith(u'commit '):
                        commit = {
                            u'name': None,
                            u'email': None,
                            u'login': None,
                            u'hash': line.split()[-1],
                            u'date': None
                        }

                    # Author: Matt Clay <*****@*****.**>
                    if line.startswith(u'Author: '):
                        line = line.replace(u'Author: ', u'')
                        line = line.replace(u'<', u'')
                        line = line.replace(u'>', u'')
                        lparts = line.split()

                        if u'@' in lparts[-1]:
                            commit[u'email'] = lparts[-1]
                            commit[u'name'] = u' '.join(lparts[:-1])
                        else:
                            pass

                        if commit[u'email'] and \
                                u'noreply.github.com' in commit[u'email']:
                            commit[u'login'] = commit[u'email'].split(u'@')[0]

                    # Date:   Sat Jan 28 23:28:53 2017 -0800
                    if line.startswith(u'Date:'):
                        dstr = line.split(u':', 1)[1].strip()
                        dstr = u' '.join(dstr.split(u' ')[:-1])
                        ds = datetime.datetime.strptime(
                            to_text(dstr),
                            u'%a %b %d %H:%M:%S %Y'
                        )
                        commit[u'date'] = ds
                        self.commits[k].append(commit)

                with open(pfile, 'wb') as f:
                    pickle_dump((mtime, self.commits[k]), f)
Beispiel #28
0
    def get_pullrequest_status(self, force_fetch=False):

        def sort_unique_statuses(statuses):
            '''reduce redundant statuses to the final run for each id'''
            result = []
            groups = []
            thisgroup = []
            for idx, x in enumerate(statuses):
                if not thisgroup:
                    thisgroup.append(x)
                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)
                    continue
                else:
                    if thisgroup[-1][u'target_url'] == x[u'target_url']:
                        thisgroup.append(x)
                    else:
                        groups.append(thisgroup)
                        thisgroup = []
                        thisgroup.append(x)

                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)

            for group in groups:
                group.sort(key=operator.itemgetter(u'updated_at'))
                result.append(group[-1])

            return result

        fetched = False
        jdata = None
        pdata = None
        # pull out the status url from the raw data
        rd = self.pullrequest_raw_data
        surl = rd[u'statuses_url']

        pfile = os.path.join(self.full_cachedir, u'pr_status.pickle')
        pdir = os.path.dirname(pfile)
        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            logging.info(u'pullrequest_status load pfile')
            with open(pfile, 'rb') as f:
                pdata = pickle_load(f)

        if pdata:
            # is the data stale?
            if pdata[0] < self.pullrequest.updated_at or force_fetch:
                logging.info(u'fetching pr status: stale, previous from %s' % pdata[0])
                jdata = self.github.get_request(surl)

                if isinstance(jdata, dict):
                    # https://github.com/ansible/ansibullbot/issues/959
                    logging.error(u'Got the following error while fetching PR status: %s', jdata.get(u'message'))
                    logging.error(jdata)
                    return []

                self.log_ci_status(jdata)
                fetched = True
            else:
                jdata = pdata[1]

        # missing?
        if not jdata:
            logging.info(u'fetching pr status: !data')
            jdata = self.github.get_request(surl)
            # FIXME? should we self.log_ci_status(jdata) here too?
            fetched = True

        if fetched or not os.path.isfile(pfile):
            logging.info(u'writing %s' % pfile)
            pdata = (self.pullrequest.updated_at, jdata)
            with open(pfile, 'wb') as f:
                pickle_dump(pdata, f)

        # remove intermediate duplicates
        #jdata = sort_unique_statuses(jdata)

        return jdata
def main():
    pprint(sys.argv)
    dest = sys.argv[1]
    print('dest: %s' % dest)

    # get_valid_labels('ansible/ansible')
    # /home/jtanner/.ansibullbot/cache/ansible/ansible/labels.pickle

    with open(os.path.expanduser('~/.ansibullbot/cache/ansible/ansible/labels.pickle'), 'rb') as f:
        labels = pickle_load(f)
    valid_labels = [x.name for x in labels[1]]

    FILEMAP_FILENAME = 'FILEMAP.json'
    COMPONENTMAP_FILENAME = 'COMPONENTMAP.json'
    FI = FileIndexer(
        checkoutdir=os.path.expanduser(
            '~/.ansibullbot/cache/ansible.files.checkout'
        ),
        cmap=COMPONENTMAP_FILENAME,
    )

    module_cache_file = '/tmp/mi-modules.json'
    if not os.path.isfile(module_cache_file):
        module_maintainers = get_maintainers_mapping()
        MI = ModuleIndexer(maintainers=module_maintainers)
        MI.get_ansible_modules()
        with open(module_cache_file, 'wb') as f:
            f.write(json.dumps(MI.modules, sort_keys=True, indent=2))
        modules = MI.modules
    else:
        with open(module_cache_file, 'rb') as f:
            modules = json.loads(f.read())

    macro_teams = {
        'Qalthos,gundalow,privateip': 'openswitch',
        'Qalthos,ganeshrn,gundalow,privateip,rcarrillocruz,trishnaguha': 'networking',
        'GGabriele,jedelman8,mikewiebe,privateip,rahushen,rcarrillocruz,trishnaguha': 'nxos',
        'emonty,j2sol,juliakreger,rcarrillocruz,shrews,thingee': 'openstack',
        'chrishoffman,manuel-sousa,romanek-adam': 'rabbitmq',
        'alikins,barnabycourt,flossware,vritant': 'rhn',
        'Qalthos,amitsi,gundalow,privateip': 'netvisor',
        'haroldwongms,nitzmahone,tstringer': 'azure',
        'dagwieers,jborean93,jhawkesworth': 'windows',
        'dagwieers,dav1x,jctanner': 'vmware',
        'isharacomix,jrrivers,privateip': 'cumulus',
        'chiradeep,giorgos-nikolopoulos': 'netscaler',
        'ericsysmin,grastogi23,khaltore': 'avi',
        'ghjm,jlaska,matburt,wwitzel3': 'tower',
        'hulquest,lmprice,timuster': 'netapp',
    }

    usermap = {
        'mpdehaan': False
    }
    namemap = {
        'Shrews': 'shrews'
    }
    exclusions = {
        '*': ['chouseknecht', 'Java1Guy', 'franckcuny', 'mhite', 'bennojoy', 'risaacson', 'whenrik'],
        'network/wakeonlan': ['dagwiers'],
    }

    removed = get_removed_maintainers()

    teams = {}
    data = {}
    data['files'] = {}

    # merge the moduleindexer data
    for k,v in modules.items():
        fp = v.get('filepath')
        if not fp or not fp.startswith('lib/ansible'):
            continue
        data['files'][k] = {}
        if v['_maintainers']:
            data['files'][k]['maintainers'] = []
            data['files'][k]['maintainers'] = [x for x in v['_maintainers']]
        if v['authors']:
            if 'maintainers' not in data['files'][k]:
                data['files'][k]['maintainers'] = []
            data['files'][k]['maintainers'] += v['authors']
            data['files'][k]['maintainers'] = sorted(set(data['files'][k]['maintainers']))

        # validate each maintainer exists
        if 'maintainers' in data['files'][k]:
            maintainers = []
            for x in data['files'][k]['maintainers']:

                if x in exclusions['*']:
                    continue

                if x in namemap:
                    x = namemap[x]
                if x in usermap:
                    if usermap[x]:
                        maintainers.append(x)
                else:
                    if x == 'ansible':
                        usermap['ansible'] = True
                        maintainers.append(x)
                        continue
                    res = requests.get('https://github.com/%s' % x)
                    if res.status_code == 200:
                        usermap[x] = True
                        maintainers.append(x)
                    else:
                        usermap[x] = False
            data['files'][k]['maintainers'] = sorted(set(maintainers))
            if not data['files'][k]['maintainers']:
                data['files'][k].pop('maintainers', None)

    # merge the removed people
    for k,v in removed.items():
        k = os.path.join('lib/ansible/modules', k)
        v = sorted(set(v))
        if k in data['files']:
            if 'maintainers' in data['files'][k]:
                for vx in v:
                    if vx in data['files'][k]['maintainers']:
                        data['files'][k]['maintainers'].remove(vx)
                        if 'ignored' not in data['files'][k]:
                            data['files'][k]['ignored'] = []
                        data['files'][k]['ignored'].append(vx)
                if not data['files'][k]['maintainers']:
                    data['files'][k].pop('maintainers', None)
                    #import epdb; epdb.st()

    # merge the fileindexer data
    for k in FI.files:
        #if 'contrib/inventory' in k:
        #    import epdb; epdb.st()
        #print(k)
        try:
            klabels = FI.get_component_labels(valid_labels, [k])
            if klabels:
                klabels = [x for x in klabels if not x.startswith('c:')]
                if not klabels:
                    continue
                if k not in data['files']:
                    data['files'][k] = {}
                if 'labels' not in data['files'][k]:
                    data['files'][k]['labels'] = []
                data['files'][k]['labels'] += klabels
        except UnicodeDecodeError:
            continue

        keywords = FI.get_keywords_for_file(k)
        if keywords:
            if k not in data['files']:
                data['files'][k] = {}
            if 'keywords' not in data['files'][k]:
                data['files'][k]['keywords'] = []
            data['files'][k]['keywords'] += keywords
            #import epdb; epdb.st()

    '''
    # calculate all teams
    for k,v in data['files'].items():
        if not v.get('maintainers'):
            continue
        maintainers = sorted(set(v['maintainers']))
        key = ','.join(maintainers)
        if key not in teams:
            teams[key] = []
        teams[key].append(k)

    # rank and show
    steams = sorted(teams, key=len, reverse=True)
    for x in steams[0:15]:
        if x in macro_teams:
            continue
        pprint(teams[x])
        print(x)
        import epdb; epdb.st()
    import epdb; epdb.st()
    '''

    for k,v in data['files'].items():
        if not v.get('maintainers'):
            continue
        maintainers = v.get('maintainers')
        for idx,x in enumerate(maintainers):
            if x == 'ansible':
                maintainers[idx] = '$team_ansible'
        if maintainers == ['$team_ansible']:
            data['files'][k]['maintainers'] = ' '.join(maintainers)
            continue
        if len(maintainers) == 1:
            data['files'][k]['maintainers'] = ' '.join(maintainers)
            continue
        mkey = ','.join(sorted(set(maintainers)))
        if mkey in macro_teams:
            maintainers = ['$team_%s' % macro_teams[mkey]]
            data['files'][k]['maintainers'] = ' '.join(maintainers)
        else:
            # partial matching
            match = None
            subnames = sorted(set(maintainers))
            for sn in subnames:
                filtered = [x for x in subnames if x != sn]
                fkey = ','.join(filtered)
                if fkey in macro_teams:
                    match = fkey
            if match:
                to_clear = match.split(',')
                maintainers = [x for x in maintainers if x not in to_clear]
                data['files'][k]['maintainers'] = ' '.join(maintainers)

    # fix deprecations
    safe_names = [x for x in FI.files if all(c in string.printable for c in x)]
    remove = []
    for k,v in data['files'].items():
        maintainers = v.get('maintainers')
        if maintainers:
            if 'DEPRECATED' in data['files'][k]['maintainers']:
                data['files'][k].pop('maintainers', None)
                data['files'][k]['deprecated'] = True
        bn = os.path.basename(k)
        if bn.startswith('_') and bn != '__init__.py' and '/modules/' in k:
            '''
            data['files'][k]['deprecated'] = True
            if 'maintainers' in data['files'][k]:
                data['files'][k].pop('maintainers', None)
            '''
            remove.append(k)

        # get rid of files no longer in the repo
        if k not in safe_names:
            remove.append(k)

    for x in remove:
        data['files'].pop(x, None)


    # remove any keys where maintainers == authors
    remove = []
    for k,v in data['files'].items():
        if v.keys() != ['maintainers']:
            continue
        if v['maintainers'] != modules[k]['authors']:
            continue
        remove.append(k)
    for x in remove:
        data['files'].pop(x, None)

    #####################################
    # add special notifies
    #####################################
    data['files']['lib/ansible/modules/cloud/amazon/'] = {
        'notify': ['willthames']
    }

    #####################################
    # reduce to namespace maintainers
    #####################################
    groups = {}
    for k,v in data['files'].items():
        dn = os.path.dirname(k)
        if dn not in groups:
            groups[dn] = {
                'matches': [],
                'values': []
            }
        groups[dn]['matches'].append(k)
        if v not in groups[dn]['values']:
            groups[dn]['values'].append(v)
    for k,v in groups.items():
        if not len(v['values']) == 1:
            continue
        if len(v['matches']) == 1:
            continue
        #print(k)
        #pprint(v)

        newk = k + '/'
        data['files'][newk] = v['values'][0]
        for pf in v['matches']:
            data['files'].pop(pf, None)

        if newk in removed:
            import epdb; epdb.st()


    #####################################
    # make a sorted dict
    #####################################

    files = data['files']
    data['files'] = OrderedDict()
    fkeys = sorted(files.keys())
    fkeys = [x.replace('lib/ansible/modules', '$modules') for x in fkeys]
    fkeys = sorted(set(fkeys))
    for fkey in fkeys:
        if fkey.startswith('$modules'):
            mkey = fkey.replace('$modules', 'lib/ansible/modules')
            data['files'][fkey] = files[mkey]
        else:
            data['files'][fkey] = files[fkey]

    data['macros'] = OrderedDict()
    data['macros']['modules'] = 'lib/ansible/modules'
    macro_items = macro_teams.items()
    macro_items = [[x[1],x[0]] for x in macro_items]
    macro_dict ={}
    for x in macro_items:
        macro_dict[x[0]] = x[1]

    data['macros']['team_ansible'] = []
    keys = macro_dict.keys()
    for k in sorted(keys):
        team = macro_dict[k]
        team = team.split(',')
        if len(team) < 10:
            team = " ".join(team)
        data['macros']['team_%s' % k] = team

    # if maintainers is the only subkey, make the primary value a string
    for k,v in data['files'].items():
        keys = v.keys()
        if keys == ['maintainers']:
            if isinstance(v['maintainers'], list):
                data['files'][k] = " ".join(v['maintainers'])
            else:
                data['files'][k] = v['maintainers']
        for xk in ['ignored', 'notified', 'maintainers']:
            if xk in data['files'][k]:
                if not isinstance(data['files'][k][xk], (str, unicode)):
                    data['files'][k][xk] = " ".join(data['files'][k][xk])


    # write it once with ryaml to make it ordered
    ryaml = rYAML()
    (fo, fn) = tempfile.mkstemp()
    with open(fn, 'wb') as f:
        ryaml.dump(data, f)

    # read it back in
    with open(fn, 'rb') as f:
        ylines = f.readlines()

    phase = None
    for idx,x in enumerate(ylines):
        x = x.rstrip()
        x = x.replace('!!omap', '')
        if x.endswith(' {}'):
            x = x.replace(' {}', '')
        if x.startswith('-'):
            x = x.replace('-', ' ', 1)
        ylines[idx] = x


        if x.startswith(' ') and ':' not in x and '-' not in x:
            ylines[idx-1] += ' ' + x.strip()
            ylines[idx] = ''

    ylines = [x for x in ylines if x.strip()]
    ylines = [HEADER] + ylines

    with open(dest, 'wb') as f:
        f.write('\n'.join(ylines))