Example #1
0
    def relocate_pickle_files(self):
        '''Move files to the correct location to fix bad pathing'''
        srcdir = os.path.join(
            self.cachedir,
            u'issues',
            to_text(self.instance.number)
        )
        destdir = os.path.join(
            self.cachedir,
            to_text(self.instance.number)
        )

        if not os.path.isdir(srcdir):
            return True

        if not os.path.isdir(destdir):
            os.makedirs(destdir)

        # move the files
        pfiles = os.listdir(srcdir)
        for pf in pfiles:
            src = os.path.join(srcdir, pf)
            dest = os.path.join(destdir, pf)
            shutil.move(src, dest)

        # get rid of the bad dir
        shutil.rmtree(srcdir)
Example #2
0
 def create_checkout(self):
     """checkout ansible"""
     # cleanup
     if os.path.isdir(self.checkoutdir):
         shutil.rmtree(self.checkoutdir)
     cmd = "git clone %s %s" \
         % (self.repo, self.checkoutdir)
     (rc, so, se) = run_command(cmd)
     print(to_text(so) + to_text(se))
Example #3
0
    def pullrequest_filepath_exists(self, filepath):
        ''' Check if a file exists on the submitters branch '''

        # https://github.com/ansible/ansibullbot/issues/406

        # https://developer.github.com/v3/repos/contents/
        #   GET /repos/:owner/:repo/readme
        # "contents_url":
        # "https://api.github.com/repos/ganeshrn/ansible/contents/{+path}",

        # self.pullrequest.head
        #   - ref --> branch name
        #   - repo.full_name

        sha = self.pullrequest.head.sha
        pdata = None
        resp = None
        cachefile = os.path.join(
            self.cachedir,
            u'issues',
            to_text(self.number),
            u'shippable_yml.pickle'
        )

        try:
            if os.path.isfile(cachefile):
                with open(cachefile, 'rb') as f:
                    pdata = pickle_load(f)
        except Exception as e:
            logging.error(u'failed to unpickle %s %s' % (cachefile, to_text(e)))

        if not pdata or pdata[0] != sha:

            if self.pullrequest.head.repo:
                url = self.pullrequest.head.repo.url + u'/contents/' + filepath
                resp = self.pullrequest._requester.requestJson(
                    u"GET",
                    url,
                    input={u'ref': self.pullrequest.head.ref}
                )
            else:
                # https://github.com/ansible/ansible/pull/19891
                # Sometimes the repo repo/branch has disappeared
                resp = [None]

            pdata = [sha, resp]
            with open(cachefile, 'wb') as f:
                pickle_dump(pdata, f)

        else:
            resp = pdata[1]

        result = False
        if resp[0]:
            result = True
        return result
Example #4
0
    def _find_match(self, pattern, exact=False):

        logging.debug(u'exact:{} matching on {}'.format(exact, pattern))

        matches = []

        if isinstance(pattern, six.text_type):
            pattern = to_text(to_bytes(pattern,'ascii', 'ignore'), 'ascii')

        for k, v in six.iteritems(self.modules):
            if v[u'name'] == pattern:
                logging.debug(u'match {} on name: {}'.format(k, v[u'name']))
                matches = [v]
                break

        if not matches:
            # search by key ... aka the filepath
            for k, v in six.iteritems(self.modules):
                if k == pattern:
                    logging.debug(u'match {} on key: {}'.format(k, k))
                    matches = [v]
                    break

        if not matches and not exact:
            # search by properties
            for k, v in six.iteritems(self.modules):
                for subkey in v.keys():
                    if v[subkey] == pattern:
                        logging.debug(u'match {} on subkey: {}'.format(k, subkey))
                        matches.append(v)

        if not matches and not exact:
            # Levenshtein distance should workaround most typos
            distance_map = {}
            for k, v in six.iteritems(self.modules):
                mname = v.get(u'name')
                if not mname:
                    continue
                if isinstance(mname, six.text_type):
                    mname = to_text(to_bytes(mname, 'ascii', 'ignore'), 'ascii')
                try:
                    res = Levenshtein.distance(pattern, mname)
                except TypeError as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb; epdb.st()
                distance_map[mname] = [res, k]
            res = sorted(distance_map.items(), key=lambda x: x[1], reverse=True)
            if len(pattern) > 3 > res[-1][1]:
                logging.debug(u'levenshtein ratio match: ({}) {} {}'.format(res[-1][-1], res[-1][0], pattern))
                matches = [self.modules[res[-1][-1]]]

        return matches
    def test_component_matching(self):

        print('')

        AT = AnsibleTriage(args={})
        AT.file_indexer.get_files()

        jfile = 'tests/fixtures/issue_template_meta.json'
        with open(jfile, 'rb') as f:
            jdata = json.load(f)

        keys = sorted([int(x) for x in jdata.keys()])

        for key in keys:

            k = to_text(key)
            v = jdata[k]

            if '/pull/' in v['html_url']:
                continue

            if not v.get('labels'):
                continue

            if 'module' in v['labels']:
                continue

            clabels = [x for x in v['labels'] if x.startswith('c:')]
            #if not clabels:
            #    continue

            print(v['html_url'])

            # extract fields from the body
            td = extract_template_data(
                v['body'],
                issue_number=key,
                issue_class=None
            )

            components = AT.file_indexer.find_component_match(
                v['title'],
                v['body'],
                td
            )
            if components and clabels:
                comp_labels = AT.file_indexer.get_component_labels(
                    AT.valid_labels,
                    components
                )
                print('\t' + to_text(comp_labels))
Example #6
0
 def get_files(self):
     '''Cache a list of filenames in the checkout'''
     cmd = u'cd {}; git ls-files'.format(self.gitrepo.checkoutdir)
     (rc, so, se) = run_command(cmd)
     files = to_text(so).split(u'\n')
     files = [x.strip() for x in files if x.strip()]
     self.files = files
Example #7
0
    def get_single_issue_summary(
        self,
        repo_url,
        number,
        cachefile=None,
        force=False
    ):

        '''Scrape the summary for a specific issue'''

        # get cached
        issues = self.load_summaries(repo_url)

        if number in issues and not force:
            return issues[number]
        else:
            if repo_url.startswith(u'http'):
                url = repo_url
            else:
                url = self.baseurl + u'/' + repo_url
            url += u'/issues/'
            url += to_text(number)

            rr = self._request_url(url)
            soup = BeautifulSoup(rr.text, u'html.parser')
            if soup.text.lower().strip() != u'not found':
                summary = self.parse_issue_page_to_summary(soup, url=rr.url)
                if summary:
                    issues[number] = summary

        if number in issues:
            return issues[number]
        else:
            return {}
Example #8
0
    def get_issue_summaries(self, repo_url, baseurl=None, cachefile=None):
        """Return a dict of all issue summaries with numbers as keys

        Adds a compatibility method for the webscraper

        Args:
            repo_url  (str): username/repository
            baseurl   (str): not used
            cachefile (str): not used
        """
        owner = repo_url.split(u'/', 1)[0]
        repo = repo_url.split(u'/', 1)[1]
        summaries = self.get_all_summaries(owner, repo)

        issues = {}
        for x in summaries:
            issues[to_text(x[u'number'])] = x

        # keep the summaries for out of band analysis
        repodata = {
            u'user': repo_url.split(u'/', 1)[0],
            u'repo': repo_url.split(u'/', 1)[1],
        }
        post_to_receiver(u'summaries', repodata, issues)

        return issues
Example #9
0
    def get_summary(self, repo_url, otype, number):
        """Collect all the summary data for issues or pull requests ids

        Args:
            repo_url  (str): repository URL
            otype     (str): issue or pullRequest
            number    (str): Identifies the pull-request or issue, for example: 12345
        """
        owner = repo_url.split(u'/', 1)[0]
        repo = repo_url.split(u'/', 1)[1]

        template = self.environment.from_string(QUERY_TEMPLATE_SINGLE_NODE)

        query = template.render(OWNER=owner, REPO=repo, OBJECT_TYPE=otype, OBJECT_PARAMS='number: %s' % number, FIELDS=QUERY_FIELDS)

        payload = {
            u'query': to_bytes(query, 'ascii', 'ignore').strip(),
            u'variables': u'{}',
            u'operationName': None
        }
        if six.PY3:
            payload[u'query'] = to_text(payload[u'query'], 'ascii')

        rr = requests.post(self.baseurl, headers=self.headers, data=json.dumps(payload))
        data = rr.json()

        node = data[u'data'][u'repository'][otype]
        if node is None:
            return

        self.update_node(node, otype, owner, repo)

        return node
Example #10
0
def post_to_receiver(path, params, data):

    if not data:
        return

    if not C.DEFAULT_RECEIVER_HOST or u'none' in C.DEFAULT_RECEIVER_HOST.lower():
        return

    rr = None
    if C.DEFAULT_RECEIVER_HOST and data:
        receiverurl = u'http://'
        receiverurl += C.DEFAULT_RECEIVER_HOST
        receiverurl += u':'
        receiverurl += to_text(C.DEFAULT_RECEIVER_PORT)
        receiverurl += u'/'
        receiverurl += path
        logging.info(u'RECEIVER: POST to %s' % receiverurl)
        try:
            rr = requests.post(receiverurl, params=params, json=data)
        except Exception as e:
            logging.warning(e)

    try:
        if rr is not None:
            for k, v in rr.json().items():
                logging.info(u'RECEIVER: %s %s' % (v, k))
    except ValueError as e:
        logging.debug(u'RECEIVER: status_code = %s' % rr.status_code)
        logging.warning(e)
Example #11
0
 def set_missing(self, number):
     mfile = os.path.join(self.cachedir, u'issues', to_text(number), u'missing')
     mdir = os.path.dirname(mfile)
     if not os.path.isdir(mdir):
         os.makedirs(mdir)
     with open(mfile, 'wb') as f:
         f.write('\n')
Example #12
0
    def merge_commits(self, commits):
        for xc in commits:

            '''
            # 'Thu, 12 Jan 2017 15:06:46 GMT'
            tfmt = '%a, %d %b %Y %H:%M:%S %Z'
            ts = xc.last_modified
            dts = datetime.datetime.strptime(ts, tfmt)
            '''
            # committer.date: "2016-12-19T08:05:45Z"
            dts = xc.commit.committer.date
            adts = pytz.utc.localize(dts)

            event = {}
            event[u'id'] = xc.sha
            if hasattr(xc.committer, u'login'):
                event[u'actor'] = xc.committer.login
            else:
                event[u'actor'] = to_text(xc.committer)
            #event[u'created_at'] = dts
            event[u'created_at'] = adts
            event[u'event'] = u'committed'
            event[u'message'] = xc.commit.message
            self.history.append(event)

        self.fix_history_tz()
        self.history = sorted(self.history, key=itemgetter(u'created_at'))
Example #13
0
 def get_pullrequest_runs(self, number):
     '''All runs for the given PR number'''
     nruns = []
     for x in self.runs:
         if x[u'commitUrl'].endswith(u'/' + to_text(number)):
             nruns.append(x)
     return nruns
Example #14
0
 def clean_issue_cache(self, number):
     # https://github.com/ansible/ansibullbot/issues/610
     cdir = os.path.join(
         self.cachedir,
         u'issues',
         to_text(number)
     )
     shutil.rmtree(cdir)
Example #15
0
    def get_files(self):

        cmd = u'find %s' % self.gitrepo.checkoutdir
        (rc, so, se) = run_command(cmd)
        files = to_text(so).split(u'\n')
        files = [x.strip() for x in files if x.strip()]
        files = [x.replace(self.gitrepo.checkoutdir + u'/', u'') for x in files]
        files = [x for x in files if not x.startswith(u'.git')]
        self.files = files
Example #16
0
 def get_files(self, force=False):
     '''Cache a list of filenames in the checkout'''
     if not self._files or force:
         cmd = u'cd {}; git ls-files'.format(self.checkoutdir)
         logging.debug(cmd)
         (rc, so, se) = run_command(cmd)
         files = to_text(so).split(u'\n')
         files = [x.strip() for x in files if x.strip()]
         self._files = files
Example #17
0
    def dump_action_dict(self, issue, actions):
        '''Serialize the action dict to disk for quick(er) debugging'''
        fn = os.path.join(u'/tmp', u'actions', issue.repo_full_name, to_text(issue.number) + u'.json')
        dn = os.path.dirname(fn)
        if not os.path.isdir(dn):
            os.makedirs(dn)

        logging.info('dumping {}'.format(fn))
        with open(fn, 'wb') as f:
            f.write(json.dumps(actions, indent=2, sort_keys=True))
Example #18
0
    def last_commit_for_file(self, filepath):
        if filepath in self.commits:
            return self.commits[filepath][0][u'hash']

        # git log --pretty=format:'%H' -1
        # lib/ansible/modules/cloud/amazon/ec2_metric_alarm.py
        cmd = u'cd %s; git log --pretty=format:\'%%H\' -1 %s' % \
            (self.gitrepo.checkoutdir, filepath)
        (rc, so, se) = run_command(cmd)
        return to_text(so).strip()
Example #19
0
    def get_files_by_commit(self, commit):

        if commit not in self.files_by_commit:
            cmd = u'cd {}; git show --pretty="" --name-only {}'.format(self.checkoutdir, commit)
            (rc, so, se) = run_command(cmd)
            filenames = [x.strip() for x in to_text(so).split(u'\n') if x.strip()]
            self.files_by_commit[commit] = filenames[:]
        else:
            filenames = self.files_by_commit[commit]

        return filenames
Example #20
0
    def get_module_metadata(self, module_file):
        meta = {}

        if not os.path.isfile(module_file):
            return meta

        rawmeta = u''
        inphase = False
        with io.open(module_file, 'r', encoding='utf-8') as f:
            for line in f:
                if line.startswith(u'ANSIBLE_METADATA'):
                    inphase = True
                if line.startswith(u'DOCUMENTATION'):
                    break
                if inphase:
                    rawmeta += line
        rawmeta = rawmeta.replace(u'ANSIBLE_METADATA =', u'', 1)
        rawmeta = rawmeta.strip()
        try:
            meta = ast.literal_eval(rawmeta)
            tmp_meta = {}
            for k, v in meta.items():
                if isinstance(k, six.binary_type):
                    k = to_text(k)
                if isinstance(v, six.binary_type):
                    v = to_text(v)
                if isinstance(v, list):
                    tmp_list = []
                    for i in v:
                        if isinstance(i, six.binary_type):
                            i = to_text(i)
                        tmp_list.append(i)
                    v = tmp_list
                    del tmp_list
                tmp_meta[k] = v
            meta = tmp_meta
            del tmp_meta
        except SyntaxError:
            pass

        return meta
Example #21
0
    def version_by_date(self, dateobj, devel=False):

        if not self.DATEVERSIONS:
            self.DATEVERSIONS = []
            cmd = u'cd %s;' % self.checkoutdir
            cmd += u'git log --date=short --pretty=format:"%ad;%H"'
            (rc, so, se) = run_command(cmd)
            lines = (x.strip() for x in to_text(so).split(u'\n'))
            lines = filter(bool, lines)
            for x in lines:
                parts = x.split(u';')
                self.DATEVERSIONS.append(parts)

        last_commit_date = self.DATEVERSIONS[0][0]
        last_commit_date = datetime.datetime.strptime(
            last_commit_date,
            u'%Y-%m-%d'
        )

        # use last commit version if older than incoming date
        if dateobj >= last_commit_date:
            acommit = self.DATEVERSIONS[0][1]
        else:
            acommit = None
            datestr = to_text(dateobj).split()[0]
            for dv in reversed(self.DATEVERSIONS):
                if dv[0] == datestr:
                    break
            if not acommit:
                datestr = u'-'.join(datestr.split(u'-')[0:2])
                for dv in self.DATEVERSIONS:
                    dvs = u'-'.join(dv[0].split(u'-')[0:2])
                    if dvs == datestr:
                        acommit = dv[1]
                        break

        aversion = None
        if acommit:
            aversion = self.ansible_version_by_commit(acommit)

        return aversion
Example #22
0
    def update_checkout(self):
        """rebase + pull + update the checkout"""

        changed = False

        cmd = "cd %s ; git pull --rebase" % self.checkoutdir
        (rc, so, se) = run_command(cmd)
        so = to_text(so)
        print(so + to_text(se))

        # If rebase failed, recreate the checkout
        if rc != 0:
            self.create_checkout()
            return True
        else:
            if u'current branch devel is up to date.' not in so.lower():
                changed = True

        self.commits_by_email = None

        return changed
Example #23
0
 def save_pullrequest(self, issue):
     cfile = os.path.join(
         self.cachedir,
         u'issues',
         to_text(issue.number),
         u'pullrequest.pickle'
     )
     cdir = os.path.dirname(cfile)
     if not os.path.isdir(cdir):
         os.makedirs(cdir)
     with open(cfile, 'wb') as f:
         pickle_dump(issue, f)
Example #24
0
 def clean_list_items(inlist):
     if isinstance(inlist, list):
         inlist = to_text(inlist)
     if u'&' in inlist:
         if C.DEFAULT_BREAKPOINTS:
             logging.error(u'breakpoint!')
             import epdb; epdb.st()
     inlist = inlist.replace(u"[", u'')
     inlist = inlist.replace(u"]", u'')
     inlist = inlist.replace(u"'", u'')
     inlist = inlist.replace(u",", u'')
     inlist = inlist.split()
     return inlist
Example #25
0
 def save_issue(self, issue):
     cfile = os.path.join(
         self.cachedir,
         u'issues',
         to_text(issue.number),
         u'issue.pickle'
     )
     cdir = os.path.dirname(cfile)
     if not os.path.isdir(cdir):
         os.makedirs(cdir)
     logging.debug(u'dump %s' % cfile)
     with open(cfile, 'wb') as f:
         pickle_dump(issue, f)
Example #26
0
 def load_issue(self, number):
     pfile = os.path.join(
         self.cachedir,
         u'issues',
         to_text(number),
         u'issue.pickle'
     )
     if os.path.isfile(pfile):
         with open(pfile, 'rb') as f:
             issue = pickle_load(f)
         return issue
     else:
         return False
Example #27
0
    def update_checkout(self):
        """rebase + pull + update the checkout"""

        changed = False

        # get a specific commit or do a rebase
        if self.commit:
            cmd = "cd %s; git log -1  | head -n1 | awk '{print $2}'" % self.checkoutdir
            (rc, so, se) = run_command(cmd)
            so = to_text(so).strip()

            if so != self.commit:
                cmd = "cd %s; git checkout %s" % (self.checkoutdir, self.commit)
                (rc, so, se) = run_command(cmd)
                changed = True

            if rc != 0:
                self.create_checkout()
                changed = True

        else:
            changed = False

            cmd = "cd %s ; git pull --rebase" % self.checkoutdir
            (rc, so, se) = run_command(cmd)
            so = to_text(so)
            print(so + to_text(se))

            # If rebase failed, recreate the checkout
            if rc != 0:
                self.create_checkout()
                return True
            else:
                if u'current branch devel is up to date.' not in so.lower():
                    changed = True

        self.commits_by_email = None

        return changed
Example #28
0
    def get_usernames_from_filename_blame(self, owner, repo, branch, filepath):

        template = self.environment.from_string(QUERY_TEMPLATE_BLAME)
        committers = defaultdict(set)
        emailmap = {}

        query = template.render(OWNER=owner, REPO=repo, BRANCH=branch, PATH=filepath)

        payload = {
            u'query': to_text(
                to_bytes(query, 'ascii', 'ignore'),
                'ascii',
            ).strip(),
            u'variables': u'{}',
            u'operationName': None
        }
        response = self.requests(payload)
        data = response.json()

        nodes = data[u'data'][u'repository'][u'ref'][u'target'][u'blame'][u'ranges']
        """
        [
            'commit':
            {
                'oid': 'a3132e5dd6acc526ce575f6db134169c7090f72d',
                'author':
                {
                    'email': '*****@*****.**',
                    'user': {'login': '******'}
                }
            }
        ]
        """
        for node in nodes:
            node = node[u'commit']
            if not node[u'author'][u'user']:
                continue
            github_id = node[u'author'][u'user'][u'login']
            committers[github_id].add(node[u'oid'])
            # emails come from 'git log --follow' but all github id aren't fetch:
            # - GraphQL/git 'blame' don't list all commits
            # - GraphQL 'history' neither because 'history' is like 'git log' but without '--follow'
            email = node[u'author'].get(u'email')
            if email and email not in emailmap:
                emailmap[email] = github_id

        for github_id, commits in committers.items():
            committers[github_id] = list(commits)
        return committers, emailmap
Example #29
0
    def save_issue(self):
        pfile = os.path.join(
            self.cachedir,
            u'issues',
            to_text(self.instance.number),
            u'issue.pickle'
        )
        pdir = os.path.dirname(pfile)

        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        logging.debug(u'dump %s' % pfile)
        with open(pfile, 'wb') as f:
            pickle_dump(self.instance, f)
Example #30
0
    def test_module_matching(self):

        print('')

        AT = AnsibleTriage(args={})

        jfile = 'tests/fixtures/issue_template_meta.json'
        with open(jfile, 'rb') as f:
            jdata = json.load(f)

        keys = sorted([int(x) for x in jdata.keys()])

        for key in keys:

            k = to_text(key)
            v = jdata[k]

            if '/pull/' in v['html_url']:
                continue

            print(v['html_url'])

            # extract fields from the body
            td = extract_template_data(
                v['body'],
                issue_number=key,
                issue_class=None
            )

            # schema tests
            assert isinstance(td, dict)
            assert 'component_raw' in td
            assert 'component name' in td

            # confirm the raw converted to the component name
            assert td['component_raw'] == v['component_raw']
            assert td['component name'] == v['component_name']

            # confirm module matching works.
            mm = AT.find_module_match(v['title'], td)
            if v['module_match']:
                if mm is None:
                    import epdb; epdb.st()
                elif mm['filepath'] != v['module_match'] and \
                        mm['name'] != v['module_match']:
                    import epdb; epdb.st()
            elif mm is not None:
                import epdb; epdb.st()
Example #31
0
    def strip_ansible_version(self, rawtext, logprefix=''):

        # any
        # all
        # all?
        # all ?
        # all recent releases
        # a55c6625d4771c44017fce1d487b38749b12b381 (latest dev)
        # ansible devel
        # devel
        # latest
        # latest devel branch
        # v2.0.0-0.9.rc4
        # N/A
        # NA
        # current head
        # master
        # not applicable
        # >2.0
        # - 1.8.2
        # - devel head f9c203feb68e224cd3d445568b39293f8a3d32ad
        # ansible@devel
        # 1.x
        # 2.x

        devel = ['devel', 'master', 'head', 'latest', 'all', 'all?', 'all ?', 'any',
                 'n/a', 'na', 'not applicable', 'latest devel',
                 'latest devel branch', 'ansible devel', '', 'future',
                 'git version', 'ansible@devel', 'all recent releases']

        if not self.VALIDVERSIONS:
            self._get_versions()

        if rawtext is None:
            return 'devel'

        aversion = False

        rawtext = rawtext.replace('`', '')
        rawtext = rawtext.strip()
        rawtext = rawtext.lower()
        rawlines = rawtext.split('\n')
        rawlines = [x.strip() for x in rawlines]

        # exit early for "devel" variations ...
        if rawtext in devel:
            return 'devel'

        # handle 1.x/2.x globs
        xver = re.compile('^-?[1-9].x')
        if len(rawlines) == 1:
            if xver.match(rawlines[0]):
                major_ver = rawlines[0].split('.')[0]

                # Get the highest minor version for this major
                cversions = reversed(sorted(self.VALIDVERSIONS.keys()))
                for cver in cversions:
                    if cver[0] == major_ver:
                        aversion = cver
                        break
                if aversion:
                    return aversion

        xver = re.compile('^-?[1-9].[1-9].x')
        if len(rawlines) == 1:
            if xver.match(rawlines[0]):
                major_ver = rawlines[0].split('.')[0]
                minor_ver = rawlines[0].split('.')[1]

                # Get the highest minor version for this major
                cversions = reversed(sorted(self.VALIDVERSIONS.keys()))
                for cver in cversions:
                    if cver[0:3] == (major_ver + '.' + minor_ver):
                        aversion = cver
                        break
                if aversion:
                    return aversion

        # check for copy/paste from --version output
        for idx, x in enumerate(rawlines):
            if len(rawlines) < (idx+2):
                continue
            if x.startswith('ansible') and \
                (rawlines[idx+1].startswith('config file') or
                 rawlines[idx+1].startswith('configured module search path')):
                parts = x.replace(')', '').split()
                aversion = parts[1]

                # is this a checkout with a hash? ...
                if len(parts) > 3:
                    pass
                elif len(parts) > 2:
                    # ['ansible', '2.2.0.0', 'rc1']
                    pass
                return aversion

        # try to find a vstring ...
        pidx = rawtext.find('.')
        if pidx > -1:
            fver = ''
            # get chars to the end of the vstring ...
            for char in rawtext[pidx:]:
                if char == ' ' or char == '\n' or char == '\r' \
                        or (not char.isalnum() and char != '.'):
                    break
                else:
                    fver += char
            head = rawtext[:pidx]
            head = head[::-1]
            # get chars to the beginning of the vstring ...
            for char in head:
                if char == ' ' or char == '\n' or char == '\r' \
                        or (not char.isalnum() and char != '.'):
                    break
                else:
                    fver = char + fver
            if fver[0] == 'v':
                fver = fver[1:]
            if fver:
                sver = None
                lver = None

                try:
                    sver = StrictVersion(fver)
                except Exception:
                    pass

                try:
                    lver = LooseVersion(fver)
                except Exception:
                    pass

                if sver:
                    return fver
                elif lver and fver[0].isdigit():
                    return fver

        lines = rawtext.split('\n')
        lines = [x.strip() for x in lines if x.strip()]
        lines = [x for x in lines if not x.startswith('config')]
        lines = [x for x in lines if not x.startswith('<')]
        lines = [x for x in lines if not x.startswith('-')]
        lines = [x for x in lines if not x.startswith('lib')]
        for idx, x in enumerate(lines):
            if "'" in x:
                x = x.replace("'", '').strip()
            if '"' in x:
                x = x.replace('"', '').strip()
            if '`' in x:
                x = x.replace('`', '').strip()
            if ',' in x:
                x = x.replace(',', '').strip()
            if '*' in x:
                x = x.replace('*', '').strip()
            if ')' in x:
                x = x.replace(')', '').strip()
            lines[idx] = x
        lines = [x.strip() for x in lines if x.strip()]
        lines = [x for x in lines if x.startswith('ansible') or x[0].isdigit() or x[0] == 'v']

        # https://github.com/ansible/ansible-modules-extras/issues/809
        #   false positives from this issue ...
        lines = [x for x in lines if 'versions: []' not in x]

        # try to narrow down to a single line
        if len(lines) > 1:
            candidate = None
            for x in lines:
                pidx = x.find('.')
                if pidx == -1:
                    continue
                if (len(x) - 1) < (pidx+1):
                    continue
                if not x[pidx+1].isdigit():
                    continue
                if (x.startswith('ansible') or x[0].isdigit()) and '.' in x:
                    candidate = x
                    break
            if candidate:
                lines = [candidate]

        if len(lines) > 0:
            try:
                StrictVersion(lines[0])
                aversion = lines[0]
            except Exception as e:

                words = lines[0].split()
                words = [x.strip() for x in words if x.strip()]
                words = [x for x in words if x != 'stable']
                words = [x for x in words if x != 'ansible']
                words = [x for x in words if x != 'ansible-doc']
                words = [x for x in words if x != 'ansible-playbook']
                if not words:
                    print(logprefix + "NO VERSIONABLE WORDS!!")
                    pass
                else:

                    if words[0].startswith('ansible-'):
                        words[0] = words[0].replace('ansible-', '')

                    if words[0][0] == 'v':
                        words[0] = words[0][1:]
                    characters = words[0].split('.')
                    digits = [x.isdigit() for x in characters]
                    digits = sorted(set(digits))
                    if digits == [True]:
                        try:
                            aversion = words[0]
                        except Exception as e:
                            logging.error(e)
                            raise
                    elif characters[0].isdigit():
                        aversion = words[0]
                    else:
                        print(logprefix + "INVALID VER STRING !!!")
                        print(logprefix + 'Exception: ' + to_text(e))
                        for line in lines:
                            print(logprefix + line)

        return aversion
Example #32
0
    def get_pullrequest_status(self, force_fetch=False):
        def sort_unique_statuses(statuses):
            '''reduce redundant statuses to the final run for each id'''
            result = []
            groups = []
            thisgroup = []
            for idx, x in enumerate(statuses):
                if not thisgroup:
                    thisgroup.append(x)
                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)
                    continue
                else:
                    if thisgroup[-1][u'target_url'] == x[u'target_url']:
                        thisgroup.append(x)
                    else:
                        groups.append(thisgroup)
                        thisgroup = []
                        thisgroup.append(x)

                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)

            for group in groups:
                group.sort(key=operator.itemgetter(u'updated_at'))
                result.append(group[-1])

            return result

        fetched = False
        jdata = None
        pdata = None
        # pull out the status url from the raw data
        rd = self.pullrequest_raw_data
        surl = rd[u'statuses_url']

        pfile = os.path.join(self.cachedir, u'issues', to_text(self.number),
                             u'pr_status.pickle')
        pdir = os.path.dirname(pfile)
        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            logging.info(u'pullrequest_status load pfile')
            with open(pfile, 'rb') as f:
                pdata = pickle_load(f)

        if pdata:
            # is the data stale?
            if pdata[0] < self.pullrequest.updated_at or force_fetch:
                logging.info(u'fetching pr status: stale, previous from %s' %
                             pdata[0])
                jdata = self._fetch_api_url(surl)
                self.log_ci_status(jdata)
                fetched = True
            else:
                jdata = pdata[1]

        # missing?
        if not jdata:
            logging.info(u'fetching pr status: !data')
            jdata = self._fetch_api_url(surl)
            fetched = True

        if fetched or not os.path.isfile(pfile):
            logging.info(u'writing %s' % pfile)
            pdata = (self.pullrequest.updated_at, jdata)
            with open(pfile, 'wb') as f:
                pickle_dump(pdata, f)

        # remove intermediate duplicates
        #jdata = sort_unique_statuses(jdata)

        return jdata
Example #33
0
 def get_version_major_minor(self, vstring):
     '''Return an X.Y version'''
     lver = LooseVersion(vstring)
     rval = '.'.join([to_text(x) for x in lver.version[0:2]])
     return rval
Example #34
0
def extract_template_data(body,
                          issue_number=None,
                          issue_class='issue',
                          sections=None,
                          find_extras=True):

    if sections is None:
        sections = SECTIONS

    # pointless to parse a null body
    if not body:
        return {}

    # simple find or fuzzy find the sections within the body
    tdict = find_sections(body) or fuzzy_find_sections(body, sections)
    if not tdict:
        return {}

    # lowercase the keys
    ndict = {}
    for k, v in six.iteritems(tdict):
        ku = k.lower()
        if ku == u'plugin name':
            ku = u'component name'
        ndict[ku] = v
    if ndict != tdict:
        tdict = ndict.copy()

    # make a raw component section for later processing
    component_raw = tdict.get(u'component name', u'')

    # https://github.com/ansible/ansibullbot/issues/359
    if u',' in tdict.get(u'component name', u''):
        tdict[u'component name'] = tdict[u'component name'].replace(
            u',', u'\n')

    # https://github.com/ansible/ansibullbot/issues/385
    if u' and ' in tdict.get(u'component name', u''):
        tdict[u'component name'] = tdict[u'component name'].replace(
            u' and ', u'\n')

    # cleanup the sections
    for k, v in six.iteritems(tdict):
        # remove markdown comments from the sections
        v = remove_markdown_comments(v)

        # remove non-ascii chars
        v = to_text(to_bytes(v, 'ascii', errors='ignore'), 'ascii')

        # normalize newlines and return chars
        v = v.replace(u'\r', u'\n')

        # remove pre-ceding and trailing newlines
        v = v.strip()

        # remove trailing hashes
        while v.endswith(u'#'):
            v = v[:-1]

        # remove pre-ceding and trailing newlines (AGAIN)
        v = v.strip()

        # clean more on critical sections
        if u'step' not in k and u'result' not in k:

            # https://github.com/ansible/ansible-modules-extras/issues/2262
            if k == u'component name':
                v = v.lower()

            if k == u'component name' and u'module' in v:
                if u'/modules/' in v or \
                        u'module_util' in v or \
                        u'module_utils/' in v or \
                        u'validate-modules' in v or\
                        u'module_common' in v:
                    # https://github.com/ansible/ansible/issues/20563
                    # https://github.com/ansible/ansible/issues/18179
                    pass
                else:
                    # some modules have the word "_module" in their name
                    # https://github.com/ansible/ansibullbot/issues/198
                    # https://github.com/ansible/ansible-modules-core/issues/4159
                    # https://github.com/ansible/ansible-modules-core/issues/5328
                    reg = re.compile(u'\S+_module')
                    match = reg.match(v)
                    if match:
                        v = v[match.pos:match.end()]
                    else:
                        # https://github.com/ansible/ansibullbot/issues/385
                        if u'modules' in v:
                            v = v.replace(u'modules', u' ')
                        else:
                            v = v.replace(u'module', u' ')

            # remove useless chars
            v = clean_bad_characters(v)

            # clean up empty lines
            vlines = v.split(u'\n')
            vlines = [x for x in vlines if x.strip()]
            vlines = [x.strip() for x in vlines if x.strip()]
            v = u'\n'.join(vlines)

            # remove pre-ceding special chars
            for bc in [u'-', u'*']:
                if v:
                    if v[0] == bc:
                        v = v[1:]
                    v = v.strip()

            # keep just the first line for types and components
            if k in [u'issue type', u'component name']:
                if v:
                    vlines = v.split(u'\n')
                    # https://github.com/ansible/ansible-modules-core/issues/3085
                    vlines = [x for x in vlines if u'pick one' not in x]
                    v = vlines[0]

            # https://github.com/ansible/ansible-modules-core/issues/4060
            if k in [u'issue type']:
                if u'/' in v:
                    v = v.split(u'/')
                    if k == [u'issue type']:
                        v = v[0]
                    else:
                        v = v[-1]
                    v = v.strip()

            if issue_class == u'issue':
                if k == u'issue type' and v != u'bug report' and u'bug' in v.lower(
                ):
                    v = u'bug report'
                elif k == u'issue type' and v != u'feature idea' and u'feature' in v.lower(
                ):
                    v = u'feature idea'
            elif issue_class == u'pullrequest':
                if k == u'issue type' and v != u'bugfix pull request' and u'bug' in v.lower(
                ):
                    v = u'bugfix pull request'
                elif k == u'issue type' and v != u'feature pull request' and u'feature' in v.lower(
                ):
                    v = u'feature pull request'
                elif k == u'issue type' and v != u'new module pull request' and u'new module' in v.lower(
                ):
                    v = u'new module pull request'
                elif k == u'issue type' and v != u'docs pull request' and u'docs' in v.lower(
                ):
                    v = u'docs pull request'
                elif k == u'issue type' and v != u'test pull request' and u'test' in v.lower(
                ):
                    v = u'test pull request'

        # save
        tdict[k] = v

    # quick clean and add raw component to the dict
    component_raw = remove_markdown_comments(component_raw)
    component_raw = clean_bad_characters(component_raw, exclude=None)
    component_raw = u'\n'.join(
        [x.strip() for x in component_raw.split(u'\n') if x.strip()])
    component_raw = u'\n'.join(
        [x for x in component_raw.split(u'\n') if not x.startswith(u'#')])
    tdict[u'component_raw'] = component_raw

    return tdict
Example #35
0
def get_last_shippable_full_run_date(ci_status, shippable):
    '''Map partial re-runs back to their last full run date'''

    # https://github.com/ansible/ansibullbot/issues/935

    # (Epdb) pp [x['target_url'] for x in ci_status]
    # [u'https://app.shippable.com/github/ansible/ansible/runs/67039/summary',
    # u'https://app.shippable.com/github/ansible/ansible/runs/67039/summary',
    # u'https://app.shippable.com/github/ansible/ansible/runs/67039',
    # u'https://app.shippable.com/github/ansible/ansible/runs/67037/summary',
    # u'https://app.shippable.com/github/ansible/ansible/runs/67037/summary',
    # u'https://app.shippable.com/github/ansible/ansible/runs/67037']

    if shippable is None:
        return None

    # extract and unique the run ids from the target urls
    runids = [get_runid_from_status(x) for x in ci_status]

    # get rid of duplicates and sort
    runids = sorted(set(runids))

    # always use the numerically higher run id
    runid = runids[-1]

    # build a datastructure to hold the info collected
    rundata = {
        u'runid': runid,
        u'created_at': None,
        u'rerun_batch_id': None,
        u'rerun_batch_createdat': None
    }

    # query the api for all data on this runid
    try:
        rdata = shippable.get_run_data(to_text(runid), usecache=False)
    except ShippableNoData:
        return None

    # whoops ...
    if rdata is None:
        return None

    # get the referenced run for the last runid if it exists
    pbag = rdata.get(u'propertyBag')
    if pbag:
        rundata[u'rerun_batch_id'] = pbag.get(u'originalRunId')

    # keep the timestamp too
    rundata[u'created_at'] = rdata.get(u'createdAt')

    # if it had a rerunbatchid it was a partial run and
    # we need to go get the date on the original run
    while rundata[u'rerun_batch_id']:
        # the original run data
        rjdata = shippable.get_run_data(rundata[u'rerun_batch_id'])
        # swap the timestamp
        rundata[u'rerun_batch_createdat'] = rundata[u'created_at']
        # get the old timestamp
        rundata[u'created_at'] = rjdata.get(u'createdAt')
        # get the new batchid
        #rundata['rerun_batch_id'] = rjdata.get('propertyBag', {}).get('originalRunId')
        pbag = rjdata.get(u'propertyBag')
        if pbag:
            rundata[u'rerun_batch_id'] = pbag.get(u'originalRunId')
        else:
            rundata[u'rerun_batch_id'] = None

    # return only the timestamp from the last full run
    return rundata[u'created_at']
Example #36
0
    def ansible_version_by_commit(self, commithash, config=None):

        # $ git branch --contains e620fed755a9c7e07df846b7deb32bbbf3164ac7
        # * devel

        #$ git branch -r --contains 6d9949698bd6a5693ef64cfde845c029f0e02b91 | egrep -e 'release' -e 'stable' | head
        #  origin/release1.5.0
        #  origin/release1.5.1
        #  origin/release1.5.2
        #  origin/release1.5.3
        #  origin/release1.5.4
        #  origin/release1.5.5
        #  origin/release1.6.0
        #  origin/release1.6.1
        #  origin/release1.6.10
        #  origin/release1.6.2
        '''
        # make sure the checkout cache is still valid
        self.update_checkout()
        '''

        aversion = None

        if not self.COMMITVERSIONS:
            self.COMMITVERSIONS = {}

        if commithash in self.COMMITVERSIONS:
            aversion = self.COMMITVERSIONS[commithash]
        else:

            # get devel's version
            devel_version = self._get_devel_version()

            cmd = u'cd %s;' % self.checkoutdir
            cmd += u'git branch -r --contains %s' % commithash
            (rc, so, se) = run_command(cmd)
            lines = (x.strip() for x in to_text(so).split(u'\n'))
            lines = list(filter(bool, lines))

            rlines = (x for x in lines
                      if x.startswith((u'origin/release', u'origin/stable')))
            rlines = (x.split(u'/')[-1] for x in rlines)
            rlines = (x.replace(u'release', u'') for x in rlines)
            rlines = [x.replace(u'stable-', u'') for x in rlines]

            if rc != 0:
                logging.error(u"rc != 0")
                if C.DEFAULT_BREAKPOINTS:
                    logging.error(u'breakpoint!')
                    import epdb
                    epdb.st()
                else:
                    raise Exception(u'bad returncode')

            if len(rlines) > 0:
                aversion = rlines[0]
            else:
                if u'HEAD' in lines[0] or lines[0].endswith(u'/devel'):
                    '''
                    cmd = 'cd %s;' % self.checkoutdir
                    cmd += 'git branch -a | fgrep -e release -e stable | tail -n 1'
                    (rc, so, se) = run_command(cmd)
                    cver = so.strip()
                    cver = cver.replace('remotes/origin/stable-', '')
                    cver = cver.replace('remotes/upstream/stable-', '')
                    cver = cver.replace('remotes/origin/release', '')
                    cver = cver.replace('remotes/upstream/release', '')
                    assert cver, "cver is null"
                    assert cver[0].isdigit(), "cver[0] is not digit: %s" % cver
                    aversion = cver
                    '''
                    aversion = devel_version
                else:
                    logging.error(u"WTF!? ...")
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb
                        epdb.st()
                    else:
                        raise Exception(u'HEAD not found')

            self.COMMITVERSIONS[commithash] = aversion

        return aversion
Example #37
0
def fuzzy_find_sections(body, sections):
    upper_body = body.upper()

    # make a map of locations where each section starts
    match_map = {}
    for section in sections:
        # http://www.tutorialspoint.com/python/string_find.htm
        # str.find(str, beg=0 end=len(string))
        match = upper_body.find(section)
        if match != -1:
            match_map[section] = match

    if not match_map:
        return {}

    # what are the header(s) being used?
    headers = []
    for k, v in match_map.items():
        try:
            before = upper_body[v - 1]
            after = upper_body[v + len(k)]
            header = before + u'${section}' + after
            headers.append(header)
        except Exception as e:
            pass

    # pick the most common header and re-search with it
    if len(sorted(set(headers))) > 1:
        choices = sorted(set(headers))
        choice_totals = []
        for choice in choices:
            ctotal = len([x for x in headers if x == choice])
            choice_totals.append((ctotal, choice))
        choice_totals.sort(key=lambda tup: tup[0])
        sheader = choice_totals[-1][1]

        match_map = {}
        t = Template(sheader)
        for section in sections:
            try:
                tofind = t.substitute(section=section)
            except Exception as e:
                if C.DEFAULT_BREAKPOINTS:
                    logging.error(u'breakpoint!')
                    import epdb
                    epdb.st()
                else:
                    raise Exception(u'substitution failed: %s' % to_text(e))
            match = upper_body.find(tofind)
            if match != -1:
                match_map[section] = match + 1

        # re-do for missing sections with less common header(s)
        for section in sections:
            if section in match_map:
                continue
            for choice in choices:
                t = Template(choice)
                tofind = t.substitute(section=section)
                match = upper_body.find(tofind)
                if match != -1:
                    match_map[section] = match + 1
                    break

    elif len(headers) <= 1:
        if headers and \
                (u'#' not in headers[0] and
                 u':' not in headers[0] and
                 u'*' not in headers[0]):
            return {}
        else:
            if C.DEFAULT_BREAKPOINTS:
                logging.error(u'breakpoint!')
                import epdb
                epdb.st()

    # sort mapping by element id and inject itype if needed
    match_map = sorted(match_map.items(), key=operator.itemgetter(1))
    if match_map and u'ISSUE TYPE' not in [x[0] for x in match_map]:
        if match_map[0][1] > 10:
            match_map.insert(0, (u'ISSUE TYPE', 0))

    # extract the sections based on their indexes
    tdict = {}
    total_indexes = len(match_map) - 1
    for idx, x in enumerate(match_map):

        if x[1] > 0:
            start_index = x[1] + (len(x[0]))
        else:
            start_index = 0

        # if last index, slice to the end
        if idx >= total_indexes:
            tdict[x[0]] = body[start_index:]
        else:
            # slice to the next section
            stop_index = match_map[idx + 1][1]
            tdict[x[0]] = body[start_index:stop_index]

    return tdict
Example #38
0
    def process(self):
        """Merge all events into chronological order"""

        # FIXME - load this just once for later reference
        cache = self._load_cache()

        processed_events = []

        events = self.issue.events
        comments = self.issue.comments
        reactions = self.issue.reactions

        processed_events = []
        for ide, event in enumerate(events):

            if isinstance(event, dict):
                event = Event(
                    event,
                    id='%s_%s_%s' %
                    (self.issue.repo_full_name, self.issue.number, ide))

            cdict = self.get_event_from_cache(event.id, cache)

            if cdict:
                edict = cdict.copy()
            else:
                edict = {}
                edict[u'id'] = event.id
                if not hasattr(event.actor, u'login'):
                    edict[u'actor'] = None
                else:
                    edict[u'actor'] = event.actor.login
                edict[u'event'] = event.event
                edict[u'created_at'] = event.created_at

                if edict[u'event'] in [u'labeled', u'unlabeled']:
                    raw_data = self._raw_data_from_event(event)
                    edict[u'label'] = raw_data.get(u'label',
                                                   {}).get(u'name', None)
                elif edict[u'event'] == u'mentioned':
                    pass
                elif edict[u'event'] == u'subscribed':
                    pass
                elif edict[u'event'] == u'referenced':
                    edict[u'commit_id'] = event.commit_id
                elif edict[u'event'] == u'assigned':
                    edict[u'assignee'] = event.raw_data[u'assignee'][u'login']
                    edict[u'assigner'] = event.raw_data[u'assigner'][u'login']

            processed_events.append(edict)

        for comment in comments:
            edict = {
                u'id': comment.id,
                u'event': u'commented',
                u'actor': comment.user.login,
                u'created_at': comment.created_at,
                u'body': comment.body,
            }
            processed_events.append(edict)

        for reaction in reactions:
            # 2016-07-26T20:08:20Z
            if not isinstance(reaction, dict):
                # FIXME - not sure what's happening here
                pass
            else:
                edict = {
                    u'id': reaction[u'id'],
                    u'event': u'reacted',
                    u'created_at': reaction[u'created_at'],
                    u'actor': reaction[u'user'][u'login'],
                    u'content': reaction[u'content'],
                }

                if isinstance(edict[u'created_at'], six.binary_type):
                    edict[u'created_at'] = to_text(edict[u'created_at'])

                # convert the timestamp the same way the lib does it
                if isinstance(edict[u'created_at'], six.text_type):
                    edict[u'created_at'] = self.parse_timestamp(
                        edict[u'created_at'])

                processed_events.append(edict)

        # get rid of events with no created_at =(
        processed_events = [
            x for x in processed_events if x.get(u'created_at')
        ]

        # sort by created_at
        sorted_events = sorted(processed_events, key=itemgetter(u'created_at'))

        # return ...
        return sorted_events
Example #39
0
    def __init__(self, issue, usecache=True, cachedir=None, exclude_users=[]):

        self.issue = issue
        self.maincache = cachedir
        self._waffled_labels = None

        if issue.repo.repo_path not in cachedir and u'issues' not in cachedir:
            self.cachefile = os.path.join(self.maincache, issue.repo.repo_path,
                                          u'issues',
                                          to_text(issue.instance.number),
                                          u'history.pickle')
        elif issue.repo.repo_path not in cachedir:
            self.cachefile = os.path.join(self.maincache, issue.repo.repo_path,
                                          u'issues',
                                          to_text(issue.instance.number),
                                          u'history.pickle')
        elif u'issues' not in cachedir:
            self.cachefile = os.path.join(self.maincache, u'issues',
                                          to_text(issue.instance.number),
                                          u'history.pickle')
        else:
            self.cachefile = os.path.join(self.maincache,
                                          to_text(issue.instance.number),
                                          u'history.pickle')

        self.cachedir = os.path.join(self.maincache,
                                     os.path.dirname(self.cachefile))
        if u'issues' not in self.cachedir:
            logging.error(self.cachedir)
            if C.DEFAULT_BREAKPOINTS:
                logging.error(u'breakpoint!')
                import epdb
                epdb.st()
            else:
                raise Exception(u'')

        if not usecache:
            self.history = self.process()
        else:
            """Building history is expensive and slow"""
            cache = self._load_cache()
            if not cache:
                logging.info(u'empty history cache, rebuilding')
                self.history = self.process()
                logging.info(u'dumping newly created history cache')
                self._dump_cache()
            else:

                reprocess = False

                # use a versioned schema to track changes
                if not cache.get(
                        'version') or cache['version'] < self.SCHEMA_VERSION:
                    reprocess = True

                if cache[u'updated_at'] < self.issue.instance.updated_at:
                    reprocess = True

                if reprocess:
                    logging.info(u'history out of date, updating')
                    self.history = self.process()
                    logging.info(u'dumping newly created history cache')
                    self._dump_cache()

                logging.info(u'use cached history')
                self.history = cache[u'history']

        if exclude_users:
            tmp_history = [x for x in self.history]
            for x in tmp_history:
                if x[u'actor'] in exclude_users:
                    self.history.remove(x)

        self.fix_history_tz()
        self.history = self._fix_comments_with_no_body(self.history)
        self.history = self._fix_commits_with_no_message(self.history)
        self.history = sorted(self.history, key=itemgetter(u'created_at'))
Example #40
0
    def parse_yaml(data):
        def clean_list_items(inlist):
            if isinstance(inlist, list):
                inlist = to_text(inlist)

            inlist = inlist.replace("[", '')
            inlist = inlist.replace("]", '')
            inlist = inlist.replace("'", '')
            inlist = inlist.replace(",", '')
            inlist = inlist.split()
            return inlist

        def join_if_list(list_or_str):
            if not isinstance(list_or_str, list):
                return list_or_str

            return ' '.join(list_or_str)

        def fix_lists(data):
            string_macros = {
                k: join_if_list(v)
                for k, v in data['macros'].items()
            }
            for k, v in data['files'].items():
                if v is None:
                    continue

                for k2, v2 in v.items():
                    if isinstance(v2, str) and '$' in v2:
                        tmpl = Template(v2)
                        newv2 = tmpl.substitute(**string_macros)
                        newv2 = clean_list_items(newv2)
                        data['files'][k][k2] = newv2
                        v2 = newv2

                    if isinstance(v2, str):
                        data['files'][k][k2] = v2.split()

            return data

        def fix_keys(data):
            replace = []
            for k in data['files'].keys():
                if '$' in k:
                    replace.append(k)
            for x in replace:
                tmpl = Template(x)
                newkey = tmpl.substitute(**data['macros'])
                data['files'][newkey] = data['files'][x]
                data['files'].pop(x, None)

            paths = list(data['files'].keys())
            for p in paths:
                normpath = os.path.normpath(p)
                if p != normpath:
                    metadata = data['files'].pop(p)
                    data['files'][normpath] = metadata
            return data

        def extend_labels(data):
            for k, v in data['files'].items():
                # labels from path(s)
                if v is None:
                    continue
                labels = v.get('labels', [])
                if isinstance(labels, str):
                    labels = labels.split()
                    labels = [x.strip() for x in labels if x.strip()]
                path_labels = [x.strip() for x in k.split('/') if x.strip()]
                for x in path_labels:
                    x = x.replace('.py', '')
                    x = x.replace('.ps1', '')
                    if x not in labels:
                        labels.append(x)
                data['files'][k]['labels'] = sorted(set(labels))

            return data

        def fix_teams(data):
            for k, v in data['macros'].items():
                if v is None:
                    continue
                if not k.startswith('team_') or isinstance(v, list):
                    continue
                names = v.split()
                data['macros'][k] = names
            return data

        def _propagate(files, top, child, field, multivalued=True):
            '''Copy key named 'field' from top to child
            - with multivalued, child inherits from all ancestors
            - else child inherits from the nearest ancestor and only if field is
              not already set at child level
            '''
            top_entries = files[top].get(field, [])
            if top_entries:
                if field not in files[child]:
                    files[child][field] = []

                # track the origin of the data
                field_keys = '%s_keys' % field
                if field_keys not in files[child]:
                    files[child][field_keys] = []

                if multivalued:
                    files[child][field_keys].append(top)
                    for entry in top_entries:
                        if entry not in files[child][field]:
                            files[child][field].append(entry)
                elif not files[child][field] or (
                        files[child][field_keys]
                        and len(files[child][field_keys][0]) < len(top)):
                    # use parent keyword only if:
                    # 1. either keyword is not set
                    # 2. or keyword has been already inherited from a less specific path
                    files[child][field_keys] = [top]
                    files[child][field] = top_entries[:]

        def propagate_keys(data):
            '''maintainers and ignored keys defined at a directory level are copied to subpath'''

            files = data['files']
            iterfiles = compute_file_children(files.keys())

            for file1, files2 in iterfiles.items():
                for file2 in files2:
                    top = min(file1, file2)
                    child = max(file1, file2)

                    _propagate(files, top, child, 'maintainers')
                    _propagate(files, top, child, 'ignored')
                    _propagate(files, top, child, 'labels')
                    _propagate(files, top, child, 'support', multivalued=False)
                    _propagate(files,
                               top,
                               child,
                               'supported_by',
                               multivalued=False)

        #################################
        #   PARSE
        #################################

        # https://github.com/ansible/ansibullbot/issues/1155#issuecomment-457731630
        logging.info('botmeta: load yaml')
        ydata_orig = yaml.load(data, BotYAMLLoader)
        ydata = yaml.load(yaml.dump(ydata_orig, Dumper=NoAliasDumper),
                          BotYAMLLoader)

        # fix the team macros
        logging.info('botmeta: fix teams')
        ydata = fix_teams(ydata)

        # fix the macro'ized file keys
        logging.info('botmeta: fix keys')
        ydata = fix_keys(ydata)

        logging.info('botmeta: iterate files')
        for k, v in ydata['files'].items():
            if v is None:
                # convert empty val in dict
                ydata['files'][k] = {}
                continue

            if isinstance(v, bytes):
                v = to_text(v)

            if isinstance(v, str):
                # convert string vals to a maintainers key in a dict
                ydata['files'][k] = {'maintainers': v}

            ydata['files'][k]['maintainers_keys'] = [k]

        # replace macros in files section
        logging.info('botmeta: fix lists')
        ydata = fix_lists(ydata)

        # extend labels by filepath
        logging.info('botmeta: extend labels')
        ydata = extend_labels(ydata)

        # key inheritance
        logging.info('botmeta: propogate keys')
        propagate_keys(ydata)

        return ydata
Example #41
0
    def get_issue_summaries(self, repo_url, baseurl=None, cachefile=None):
        '''Paginate through github's web interface and scrape summaries'''

        # repo_url - https://github.com/ansible/ansible for example
        # baseurl - an entrypoint for one-off utils to scrape specific issue
        #           query urls. NOTE: this disables writing a cache

        # get cached
        if not baseurl:
            issues = self.load_summaries(repo_url)
        else:
            issues = {}

        if not baseurl:
            url = repo_url
            url += '/issues'
            url += '?'
            url += 'q='
            url += urllib.parse.quote('sort:updated-desc')
        else:
            url = baseurl

        namespace = repo_url.split('/')[-2]
        reponame = repo_url.split('/')[-1]

        rr = self._request_url(url)
        soup = BeautifulSoup(rr.text, 'html.parser')
        data = self._parse_issue_summary_page(soup)
        if data['issues']:
            # send to receiver
            post_to_receiver('html_summaries', {
                'user': namespace,
                'repo': reponame
            }, data['issues'])
            # update master list
            issues.update(data['issues'])

        if not baseurl:
            self.dump_summaries_tmp(repo_url, issues)

        while data['next_page']:
            rr = self._request_url(self.baseurl + data['next_page'])
            soup = BeautifulSoup(rr.text, 'html.parser')
            data = self._parse_issue_summary_page(soup)

            # send to receiver
            post_to_receiver('html_summaries', {
                'user': namespace,
                'repo': reponame
            }, data['issues'])

            if not data['next_page'] or not data['issues']:
                break

            changed = []
            changes = False
            for k, v in data['issues'].items():

                if not isinstance(k, unicode):
                    k = '%s' % k

                if k not in issues:
                    changed.append(k)
                    changes = True
                elif v != issues[k]:
                    changed.append(k)
                    changes = True
                issues[k] = v

            if changed:
                logging.info('changed: %s' % ','.join(x for x in changed))

            if not baseurl:
                self.dump_summaries_tmp(repo_url, issues)

            if not changes:
                break

        # get missing
        if not baseurl:
            numbers = sorted([int(x) for x in issues.keys()])
            missing = [x for x in xrange(1, numbers[-1]) if x not in numbers]
            for x in missing:
                summary = self.get_single_issue_summary(repo_url,
                                                        x,
                                                        force=True)
                if summary:
                    post_to_receiver('html_summaries', {
                        'user': namespace,
                        'repo': reponame
                    }, {x: summary})
                    if not isinstance(x, unicode):
                        x = '%s' % x
                    issues[x] = summary

        # get missing timestamps
        if not baseurl:
            numbers = sorted([int(x) for x in issues.keys()])
            missing = [
                x for x in numbers if to_text(x) not in issues
                or not issues[to_text(x)]['updated_at']
            ]
            for x in missing:
                summary = self.get_single_issue_summary(repo_url,
                                                        x,
                                                        force=True)
                if summary:
                    post_to_receiver('html_summaries', {
                        'user': namespace,
                        'repo': reponame
                    }, {x: summary})
                    if not isinstance(x, unicode):
                        x = '%s' % x
                    issues[x] = summary

        # save the cache
        if not baseurl:
            self.dump_summaries(repo_url, issues)

        return issues
Example #42
0
    def get_module_commits(self):
        keys = self.modules.keys()
        keys = sorted(keys)
        for k in keys:
            self.commits[k] = []
            cpath = os.path.join(self.gitrepo.checkoutdir, k)
            if not os.path.isfile(cpath):
                continue

            mtime = os.path.getmtime(cpath)
            refresh = False
            pfile = os.path.join(self.scraper_cache,
                                 k.replace(u'/', u'_') + u'.commits.pickle')

            if not os.path.isfile(pfile):
                refresh = True
            else:
                pickle_kwargs = {'encoding': 'bytes'} if six.PY3 else {}
                print(pfile)
                with open(pfile, 'rb') as f:
                    pdata = pickle_load(f, **pickle_kwargs)
                if pdata[0] == mtime:
                    self.commits[k] = pdata[1]
                else:
                    refresh = True

            if refresh:
                logging.info(u'refresh commit cache for %s' % k)
                cmd = u'cd %s; git log --follow %s' % (
                    self.gitrepo.checkoutdir, k)
                (rc, so, se) = run_command(cmd)
                for line in to_text(so).split(u'\n'):
                    if line.startswith(u'commit '):
                        commit = {
                            u'name': None,
                            u'email': None,
                            u'login': None,
                            u'hash': line.split()[-1],
                            u'date': None
                        }

                    # Author: Matt Clay <*****@*****.**>
                    if line.startswith(u'Author: '):
                        line = line.replace(u'Author: ', u'')
                        line = line.replace(u'<', u'')
                        line = line.replace(u'>', u'')
                        lparts = line.split()

                        if u'@' in lparts[-1]:
                            commit[u'email'] = lparts[-1]
                            commit[u'name'] = u' '.join(lparts[:-1])
                        else:
                            pass

                        if commit[u'email'] and \
                                u'noreply.github.com' in commit[u'email']:
                            commit[u'login'] = commit[u'email'].split(u'@')[0]

                    # Date:   Sat Jan 28 23:28:53 2017 -0800
                    if line.startswith(u'Date:'):
                        dstr = line.split(u':', 1)[1].strip()
                        dstr = u' '.join(dstr.split(u' ')[:-1])
                        ds = datetime.datetime.strptime(
                            to_text(dstr), u'%a %b %d %H:%M:%S %Y')
                        commit[u'date'] = ds
                        self.commits[k].append(commit)

                with open(pfile, 'wb') as f:
                    pickle_dump((mtime, self.commits[k]), f)
Example #43
0
 def clean_issue_cache(self, number):
     # https://github.com/ansible/ansibullbot/issues/610
     cdir = os.path.join(self.cachedir, u'issues', to_text(number))
     shutil.rmtree(cdir)
Example #44
0
def get_template_data(iw):
    """Extract templated data from an issue body"""

    if iw.is_issue():
        tfile = u'.github/ISSUE_TEMPLATE/bug_report.md'
    else:
        tfile = u'.github/PULL_REQUEST_TEMPLATE.md'

    # use the fileindexer whenever possible to conserve ratelimits
    if iw.gitrepo:
        tf_content = iw.gitrepo.get_file_content(tfile)
    else:
        try:
            tf = iw.repo.get_file_contents(tfile)
            tf_content = tf.decoded_content
        except Exception:
            logging.warning(u'repo does not have {}'.format(tfile))
            tf_content = u''

    # pull out the section names from the tempalte
    tf_sections = extract_template_sections(tf_content, header=TEMPLATE_HEADER)

    # what is required?
    iw._required_template_sections = \
        [x.lower() for x in tf_sections.keys()
            if tf_sections[x][u'required']]

    # extract ...
    template_data = \
        extract_template_data(
            iw.instance.body,
            issue_number=iw.number,
            issue_class=iw.github_type,
            sections=tf_sections.keys()
        )

    # try comments if the description was insufficient
    if len(template_data.keys()) <= 2:
        s_comments = iw.history.get_user_comments(iw.submitter)
        for s_comment in s_comments:

            _template_data = extract_template_data(s_comment,
                                                   issue_number=iw.number,
                                                   issue_class=iw.github_type,
                                                   sections=tf_sections.keys())

            if _template_data:
                for k, v in _template_data.items():
                    if not v:
                        continue
                    if v and (k not in template_data
                              or not template_data.get(k)):
                        template_data[k] = v

    if u'ANSIBLE VERSION' in tf_sections and u'ansible version' not in template_data:

        # FIXME - abstract this into a historywrapper method
        vlabels = [x for x in iw.history.history if x[u'event'] == u'labeled']
        vlabels = [
            x for x in vlabels
            if x[u'actor'] not in [u'ansibot', u'ansibotdev']
        ]
        vlabels = [
            x[u'label'] for x in vlabels if x[u'label'].startswith(u'affects_')
        ]
        vlabels = [x for x in vlabels if x.startswith(u'affects_')]

        versions = [x.split(u'_')[1] for x in vlabels]
        versions = [float(x) for x in versions]
        if versions:
            version = versions[-1]
            template_data[u'ansible version'] = to_text(version)

    if u'COMPONENT NAME' in tf_sections and u'component name' not in template_data:
        if iw.is_pullrequest():
            fns = iw.files
            if fns:
                template_data[u'component name'] = u'\n'.join(fns)
                template_data[u'component_raw'] = u'\n'.join(fns)
        else:
            clabels = [x for x in iw.labels if x.startswith(u'c:')]
            if clabels:
                fns = []
                for clabel in clabels:
                    clabel = clabel.replace(u'c:', u'')
                    fns.append(u'lib/ansible/' + clabel)
                template_data[u'component name'] = u'\n'.join(fns)
                template_data[u'component_raw'] = u'\n'.join(fns)

            elif u'documentation' in template_data.get(u'issue type',
                                                       u'').lower():
                template_data[u'component name'] = u'docs'
                template_data[u'component_raw'] = u'docs'

    if u'ISSUE TYPE' in tf_sections and u'issue type' not in template_data:

        # FIXME - turn this into a real classifier based on work done in
        # jctanner/pr-triage repo.

        itype = None

        while not itype:

            for label in iw.labels:
                if label.startswith(u'bug'):
                    itype = u'bug'
                    break
                elif label.startswith(u'feature'):
                    itype = u'feature'
                    break
                elif label.startswith(u'doc'):
                    itype = u'docs'
                    break
            if itype:
                break

            if iw.is_pullrequest():
                fns = iw.files
                for fn in fns:
                    if fn.startswith(u'doc'):
                        itype = u'docs'
                        break
            if itype:
                break

            msgs = [iw.title, iw.body]
            if iw.is_pullrequest():
                msgs += [
                    x[u'message'] for x in iw.history.history
                    if x[u'event'] == u'committed'
                ]

            msgs = [x for x in msgs if x]
            msgs = [x.lower() for x in msgs]

            for msg in msgs:
                if u'fix' in msg:
                    itype = u'bug'
                    break
                if u'addresses' in msg:
                    itype = u'bug'
                    break
                if u'broke' in msg:
                    itype = u'bug'
                    break
                if u'add' in msg:
                    itype = u'feature'
                    break
                if u'should' in msg:
                    itype = u'feature'
                    break
                if u'please' in msg:
                    itype = u'feature'
                    break
                if u'feature' in msg:
                    itype = u'feature'
                    break

            # quit now
            break

        if itype and itype == u'bug' and iw.is_issue():
            template_data[u'issue type'] = u'bug report'
        elif itype and itype == u'bug' and not iw.is_issue():
            template_data[u'issue type'] = u'bugfix pullrequest'
        elif itype and itype == u'feature' and iw.is_issue():
            template_data[u'issue type'] = u'feature idea'
        elif itype and itype == u'feature' and not iw.is_issue():
            template_data[u'issue type'] = u'feature pullrequest'
        elif itype and itype == u'docs' and iw.is_issue():
            template_data[u'issue type'] = u'documentation report'
        elif itype and itype == u'docs' and not iw.is_issue():
            template_data[u'issue type'] = u'documenation pullrequest'

    return template_data
Example #45
0
    def parse_yaml(data):

        def clean_list_items(inlist):
            if isinstance(inlist, list):
                inlist = to_text(inlist)
            if u'&' in inlist:
                if C.DEFAULT_BREAKPOINTS:
                    logging.error(u'breakpoint!')
                    import epdb; epdb.st()
            inlist = inlist.replace(u"[", u'')
            inlist = inlist.replace(u"]", u'')
            inlist = inlist.replace(u"'", u'')
            inlist = inlist.replace(u",", u'')
            inlist = inlist.split()
            return inlist

        def join_if_list(list_or_str):
            if not isinstance(list_or_str, list):
                return list_or_str

            return u' '.join(list_or_str)

        def fix_lists(data):
            string_macros = {
                k: join_if_list(v)
                for k, v in data[u'macros'].items()
            }
            for k, v in data[u'files'].items():
                if v is None:
                    continue

                for k2, v2 in v.items():
                    if isinstance(v2, six.text_type) and u'$' in v2:
                        tmpl = Template(v2)
                        newv2 = tmpl.substitute(**string_macros)
                        newv2 = clean_list_items(newv2)
                        data[u'files'][k][k2] = newv2
                        v2 = newv2

                    if isinstance(v2, six.text_type):
                        data[u'files'][k][k2] = v2.split()

            return data

        def fix_keys(data):
            replace = []
            for k in data[u'files'].keys():
                if u'$' in k:
                    replace.append(k)
            for x in replace:
                tmpl = Template(x)
                newkey = tmpl.substitute(**data[u'macros'])
                data[u'files'][newkey] = data[u'files'][x]
                data[u'files'].pop(x, None)

            paths = list(data[u'files'].keys())
            for p in paths:
                normpath = os.path.normpath(p)
                if p != normpath:
                    metadata = data[u'files'].pop(p)
                    data[u'files'][normpath] = metadata
            return data

        def extend_labels(data):
            for k, v in data[u'files'].items():
                # labels from path(s)
                if v is None:
                    continue
                labels = v.get(u'labels', [])
                if isinstance(labels, six.text_type):
                    labels = labels.split()
                    labels = [x.strip() for x in labels if x.strip()]
                path_labels = [x.strip() for x in k.split(u'/') if x.strip()]
                for x in path_labels:
                    x = x.replace(u'.py', u'')
                    x = x.replace(u'.ps1', u'')
                    if x not in labels:
                        labels.append(x)
                data[u'files'][k][u'labels'] = sorted(set(labels))

            return data

        def fix_teams(data):
            for k, v in data[u'macros'].items():
                if v is None:
                    continue
                if not k.startswith(u'team_') or isinstance(v, list):
                    continue
                names = v.split()
                data[u'macros'][k] = names
            return data

        def _propagate(files, top, child, field, multivalued=True):
            '''Copy key named 'field' from top to child
            - with multivalued, child inherits from all ancestors
            - else child inherits from the nearest ancestor and only if field is
              not already set at child level
            '''
            top_entries = files[top].get(field, [])
            if top_entries:
                if field not in files[child]:
                    files[child][field] = []

                # track the origin of the data
                field_keys = u'%s_keys' % field
                if field_keys not in files[child]:
                    files[child][field_keys] = []

                if multivalued:
                    files[child][field_keys].append(top)
                    for entry in top_entries:
                        if entry not in files[child][field]:
                            files[child][field].append(entry)
                elif not files[child][field] or (files[child][field_keys] and len(files[child][field_keys][0]) < len(top)):
                    # use parent keyword only if:
                    # 1. either keyword is not set
                    # 2. or keyword has been already inherited from a less specific path
                    files[child][field_keys] = [top]
                    files[child][field] = top_entries[:]

        def propagate_keys(data):
            files = data[u'files']
            '''maintainers and ignored keys defined at a directory level are copied to subpath'''
            for file1, file2 in itertools.combinations(files.keys(), 2):
                # Python 2.7 doesn't provide os.path.commonpath
                common = os.path.commonprefix([file1, file2])
                top = min(file1, file2)
                child = max(file1, file2)

                top_components = top.split(u'/')
                child_components = child.split(u'/')

                if common == top and top_components == child_components[:len(top_components)]:
                    _propagate(files, top, child, u'maintainers')
                    _propagate(files, top, child, u'ignored')
                    _propagate(files, top, child, u'labels')
                    _propagate(files, top, child, u'support', multivalued=False)
                    _propagate(files, top, child, u'supported_by', multivalued=False)

        #################################
        #   PARSE
        #################################

        # https://github.com/ansible/ansibullbot/issues/1155#issuecomment-457731630
        ydata_orig = yaml.load(data, BotYAMLLoader)
        ydata = yaml.load(yaml.dump(ydata_orig, Dumper=NoAliasDumper), BotYAMLLoader)

        # fix the team macros
        ydata = fix_teams(ydata)

        # fix the macro'ized file keys
        ydata = fix_keys(ydata)

        for k, v in ydata[u'files'].items():
            if v is None:
                # convert empty val in dict
                ydata[u'files'][k] = {}
                continue

            if isinstance(v, six.binary_type):
                v = to_text(v)

            if isinstance(v, six.text_type):
                # convert string vals to a maintainers key in a dict
                ydata[u'files'][k] = {
                    u'maintainers': v
                }

            ydata[u'files'][k][u'maintainers_keys'] = [k]

        # replace macros in files section
        ydata = fix_lists(ydata)

        # extend labels by filepath
        ydata = extend_labels(ydata)

        propagate_keys(ydata)

        return ydata
Example #46
0
    def get_summaries(self,
                      owner,
                      repo,
                      otype='issues',
                      last=None,
                      first='first: 100',
                      states='states: OPEN',
                      paginate=True):
        """Collect all the summary data for issues or pullreuests

        Args:
            owner     (str): the github namespace
            repo      (str): the github repository
            otype     (str): issues or pullRequests
            first     (str): number of nodes per page, oldest to newest
            last      (str): number of nodes per page, newest to oldest
            states    (str): open or closed issues
            paginate (bool): recurse through page results

        """

        templ = self.environment.from_string(QUERY_TEMPLATE)

        # after: "$endCursor"
        after = None
        '''
        # first: 100
        first = 'first: 100'
        # states: OPEN
        states = 'states: OPEN'
        '''

        nodes = []
        pagecount = 0
        while True:
            logging.debug(u'%s/%s %s pagecount:%s nodecount: %s' %
                          (owner, repo, otype, pagecount, len(nodes)))

            issueparams = u', '.join(
                [x for x in [states, first, last, after] if x])
            query = templ.render(OWNER=owner,
                                 REPO=repo,
                                 OBJECT_TYPE=otype,
                                 OBJECT_PARAMS=issueparams,
                                 FIELDS=QUERY_FIELDS)

            payload = {
                #u'query': to_bytes(query, 'ascii', 'ignore').strip(),
                u'query': to_text(query, 'ascii', 'ignore').strip(),
                u'variables': u'{}',
                u'operationName': None
            }
            rr = requests.post(self.baseurl,
                               headers=self.headers,
                               data=json.dumps(payload))
            if not rr.ok:
                break
            data = rr.json()
            if not data:
                break

            # keep each edge/node/issue
            for edge in data[u'data'][u'repository'][otype][u'edges']:
                node = edge[u'node']
                self.update_node(node, otype.lower()[:-1], owner, repo)
                nodes.append(node)

            if not paginate:
                break

            pageinfo = data.get(u'data', {}).get(u'repository',
                                                 {}).get(otype,
                                                         {}).get(u'pageInfo')
            if not pageinfo:
                break
            if not pageinfo.get(u'hasNextPage'):
                break

            after = u'after: "%s"' % pageinfo[u'endCursor']
            pagecount += 1

        return nodes
Example #47
0
    def _collect_repo(self, repo, issuenums=None):
        '''Collect issues for an individual repo'''
        logging.info('getting repo obj for %s' % repo)
        if repo not in self.repos:
            gitrepo = GitRepoWrapper(
                cachedir=self.cachedir_base,
                repo=f'https://github.com/{repo}',
                commit=self.args.ansible_commit,
            )
            self.repos[repo] = {
                'repo': self.ghw.get_repo(repo),
                'issues': [],
                'processed': [],
                'since': None,
                'stale': [],
                'loopcount': 0,
                'labels': self.ghw.get_valid_labels(repo),
                'gitrepo': gitrepo,
            }
        else:
            # force a clean repo object to limit caching problems
            logging.info('updating repo')
            self.repos[repo]['repo'] = self.ghw.get_repo(repo)
            logging.info('updating checkout')
            self.repos[repo]['gitrepo'].update()

            # clear the issues
            self.repos[repo]['issues'] = {}
            # increment the loopcount
            self.repos[repo]['loopcount'] += 1

        logging.info('getting issue objs for %s' % repo)
        self.update_issue_summaries(repopath=repo, issuenums=issuenums)

        issuecache = {}
        numbers = self.issue_summaries[repo].keys()
        numbers = {int(x) for x in numbers}
        if issuenums:
            numbers.intersection_update(issuenums)
            numbers = list(numbers)
        logging.info('%s known numbers' % len(numbers))

        if self.args.daemonize:

            if not self.repos[repo]['since']:
                ts = [
                    x[1]['updated_at']
                    for x in self.issue_summaries[repo].items()
                    if x[1]['updated_at']
                ]
                ts += [
                    x[1]['created_at']
                    for x in self.issue_summaries[repo].items()
                    if x[1]['created_at']
                ]
                ts = sorted(set(ts))
                if ts:
                    self.repos[repo]['since'] = ts[-1]
            else:
                since = strip_time_safely(self.repos[repo]['since'])
                api_since = self.repos[repo]['repo'].get_issues(since=since)

                numbers = []
                for x in api_since:
                    numbers.append(x.number)
                    issuecache[x.number] = x

                numbers = sorted({int(n) for n in numbers})
                logging.info('%s numbers after [api] since == %s' %
                             (len(numbers), since))

                for k, v in self.issue_summaries[repo].items():
                    if v['created_at'] is None:
                        # issue is closed and was never processed
                        continue

                    if v['created_at'] > self.repos[repo]['since']:
                        numbers.append(k)

                numbers = sorted({int(n) for n in numbers})
                logging.info('%s numbers after [www] since == %s' %
                             (len(numbers), since))

        if self.args.start_at and self.repos[repo]['loopcount'] == 0:
            numbers = [x for x in numbers if x <= self.args.start_at]
            logging.info('%s numbers after start-at' % len(numbers))

        # Get stale numbers if not targeting
        if self.args.daemonize and self.repos[repo]['loopcount'] > 0:
            logging.info('checking for stale numbers')
            stale = self.get_stale_numbers(repo)
            self.repos[repo]['stale'] = [int(x) for x in stale]
            numbers += [int(x) for x in stale]
            numbers = sorted(set(numbers))
            logging.info('%s numbers after stale check' % len(numbers))

        ################################################################
        # PRE-FILTERING TO PREVENT EXCESSIVE API CALLS
        ################################################################

        # filter just the open numbers
        if not self.args.only_closed and not self.args.ignore_state:
            numbers = [
                x for x in numbers
                if (to_text(x) in self.issue_summaries[repo] and
                    self.issue_summaries[repo][to_text(x)]['state'] == 'open')
            ]
            logging.info('%s numbers after checking state' % len(numbers))

        # filter by type
        if self.args.only_issues:
            numbers = [
                x for x in numbers
                if self.issue_summaries[repo][to_text(x)]['type'] == 'issue'
            ]
            logging.info('%s numbers after checking type' % len(numbers))
        elif self.args.only_prs:
            numbers = [
                x for x in numbers if self.issue_summaries[repo][to_text(x)]
                ['type'] == 'pullrequest'
            ]
            logging.info('%s numbers after checking type' % len(numbers))

        numbers = sorted({int(x) for x in numbers})
        if self.args.sort == 'desc':
            numbers = [x for x in reversed(numbers)]

        if self.args.last and len(numbers) > self.args.last:
            numbers = numbers[0 - self.args.last:]

        # Use iterator to avoid requesting all issues upfront
        self.repos[repo]['issues'] = RepoIssuesIterator(
            self.repos[repo]['repo'], numbers, issuecache=issuecache)

        logging.info('getting repo objs for %s complete' % repo)
Example #48
0
    def load_update_fetch(self, property_name, obj=None, force=False):
        '''Fetch a property for an issue object'''

        # A pygithub issue object has methods such as ...
        #   - get_events()
        #   - get_comments()
        # Those methods return a list with no update() property,
        # so we can't take advantage of the caching scheme used
        # for the issue it's self. Instead this function calls
        # those methods by their given name, and write the data
        # to a pickle file with a timestamp for the fetch time.
        # Upon later loading of the pickle, the timestamp is
        # compared to the issue's update_at timestamp and if the
        # pickle data is behind, the process will be repeated.

        edata = None
        events = []
        updated = None
        update = False
        write_cache = False

        pfile = os.path.join(self.full_cachedir, u'%s.pickle' % property_name)
        pdir = os.path.dirname(pfile)
        logging.debug(pfile)

        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            try:
                with open(pfile, 'rb') as f:
                    edata = pickle_load(f)
            except Exception as e:
                update = True
                write_cache = True

        # check the timestamp on the cache
        if edata:
            updated = edata[0]
            events = edata[1]
            if updated < self.instance.updated_at:
                update = True
                write_cache = True

        baseobj = None
        if obj:
            if obj == u'issue':
                baseobj = self.instance
            elif obj == u'pullrequest':
                baseobj = self.pullrequest
        else:
            if hasattr(self.instance, u'get_' + property_name):
                baseobj = self.instance
            else:
                if self.pullrequest:
                    if hasattr(self.pullrequest, u'get_' + property_name):
                        baseobj = self.pullrequest

        if not baseobj:
            logging.error(
                u'%s was not a property for the issue or the pullrequest'
                % property_name
            )
            if C.DEFAULT_BREAKPOINTS:
                logging.error(u'breakpoint!')
                import epdb; epdb.st()
            else:
                raise Exception(u'property error')

        # pull all events if timestamp is behind or no events cached
        if update or not events or force:
            write_cache = True
            updated = datetime.datetime.utcnow()

            if not hasattr(baseobj, u'get_' + property_name) \
                    and hasattr(baseobj, property_name):
                # !callable properties
                try:
                    methodToCall = getattr(baseobj, property_name)
                except Exception as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb; epdb.st()
                    else:
                        raise Exception(to_text(e))
                events = methodToCall
            else:
                # callable properties
                try:
                    methodToCall = getattr(baseobj, u'get_' + property_name)
                except Exception as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb; epdb.st()
                    else:
                        raise Exception(to_text(e))
                events = [x for x in methodToCall()]

        if C.DEFAULT_PICKLE_ISSUES:
            if write_cache or not os.path.isfile(pfile) or force:
                # need to dump the pickle back to disk
                edata = [updated, events]
                with open(pfile, 'wb') as f:
                    pickle_dump(edata, f)

        return events
Example #49
0
    def get_module_authors(self, module_file):
        """Grep the authors out of the module docstrings"""

        if not os.path.exists(module_file):
            return []

        documentation = b''
        inphase = False

        with io.open(module_file, 'rb') as f:
            for line in f:
                if b'DOCUMENTATION' in line:
                    inphase = True
                    continue
                if line.strip().endswith((b"'''", b'"""')):
                    break
                if inphase:
                    documentation += line

        if not documentation:
            return []

        # clean out any other yaml besides author to save time
        inphase = False
        author_lines = u''
        doc_lines = to_text(documentation).split(u'\n')
        for idx, x in enumerate(doc_lines):
            if x.startswith(u'author'):
                inphase = True
            if inphase and not x.strip().startswith((u'-', u'author')):
                inphase = False
                break
            if inphase:
                author_lines += x + u'\n'

        if not author_lines:
            return []

        ydata = {}
        try:
            ydata = yaml.load(author_lines, BotYAMLLoader)
        except Exception as e:
            print(e)
            return []

        # quit early if the yaml was not valid
        if not ydata:
            return []

        # quit if the key was not found
        if u'author' not in ydata:
            return []

        if not isinstance(ydata[u'author'], list):
            ydata[u'author'] = [ydata[u'author']]

        authors = []
        for author in ydata[u'author']:
            github_ids = self.extract_github_id(author)
            if github_ids:
                authors.extend(github_ids)
        return authors
Example #50
0
    def scrape_pullrequest_review(self, repo_path, number):

        reviews = {'users': {}, 'reviews': {}}

        url = self.baseurl
        url += '/'
        url += repo_path
        url += '/pull/'
        url += to_text(number)

        rr = self._request_url(url)
        soup = BeautifulSoup(rr.text, 'html.parser')

        # <span class="reviewers-status-icon tooltipped tooltipped-nw
        # float-right d-block text-center" aria-label="nerzhul requested
        # changes">
        spans = soup.findAll(
            'span', {'class': lambda L: L and 'reviewers-status-icon' in L})
        for span in spans:
            # nerzhul requested changes
            # bcoca left review comments
            # gundalow approved these changes
            # requested review from gundalow
            txt = span.attrs['aria-label']
            tparts = txt.split(None, 1)
            if not tparts[0].lower() == 'awaiting':
                reviews['users'][tparts[0]] = tparts[1]

        # <div class="discussion-item discussion-item-review_requested">
        # <div id="pullrequestreview-15502866" class="timeline-comment
        # js-comment">
        rdivs = soup.findAll(
            'div', {'class': lambda L: L and 'discussion-item-review' in L})

        count = 0
        for rdiv in rdivs:
            count += 1

            author = rdiv.find('a', {'class': ['author']}).text

            id_div = rdiv.find(
                'div',
                {'id': lambda L: L and L.startswith('pullrequestreview-')})
            if id_div:
                rid = id_div.attrs['id']
            else:
                rid = count

            tdiv = rdiv.find('relative-time')
            if tdiv:
                timestamp = tdiv['datetime']
            else:
                timestamp = None

            obutton = rdiv.findAll(
                'button',
                {'class': lambda L: L and 'outdated-comment-label' in L})
            if obutton:
                outdated = True
            else:
                outdated = False

            reviewer = None

            # https://github.com/ansible/ansibullbot/issues/523
            adiv = rdiv.find('div', {
                'class':
                lambda L: L and L.startswith('discussion-item-header')
            })
            if not adiv:
                adiv = rdiv.find('div', {'class': 'discussion-item'})

                if not adiv:

                    adiv = rdiv.find(
                        'h3', {
                            'class':
                            lambda L: L and L.startswith(
                                'discussion-item-header')
                        })

            atxt = adiv.text
            atxt = atxt.lower()
            if 'suggested changes' in atxt:
                action = 'suggested changes'
            elif 'requested changes' in atxt:
                action = 'requested changes'
            elif 'self-requested a review' in atxt:
                # <a href="/resmo" class="author">resmo</a>
                action = 'requested review'
                ra = rdiv.find('a', {'class': 'author'})
                if ra:
                    reviewer = ra.text.strip()
            elif 'requested a review' in atxt:
                action = 'requested review'
                tparts = atxt.split()
                findex = tparts.index('from')
                reviewer = tparts[findex + 1]
            elif 'requested review' in atxt:
                action = 'requested review'
                tparts = atxt.split()
                findex = tparts.index('from')
                reviewer = tparts[findex + 1]
            elif 'approved these changes' in atxt:
                action = 'approved'
            elif 'left review comments' in atxt:
                action = 'review comment'
            elif 'reviewed' in atxt:
                action = 'reviewed'
            elif 'dismissed' in atxt:
                action = 'dismissed'
            elif 'removed ' in atxt:
                action = 'removed'
                tparts = atxt.split()
                if 'from' in tparts:
                    findex = tparts.index('from')
                    reviewer = tparts[findex + 1]
            else:
                raise Exception('parsing error on %s' % atxt)

            reviews['reviews'][rid] = {
                'actor': author,
                'action': action,
                'reviewer': reviewer,
                'timestamp': timestamp,
                'outdated': outdated
            }

        # force to ascii
        x = {}
        for k, v in reviews['users'].items():
            k = k.encode('ascii', 'ignore')
            v = v.encode('ascii', 'ignore')
            x[k] = v
        reviews['users'] = x.copy()

        return reviews
Example #51
0
    def fuzzy_match(self, repo=None, title=None, component=None):
        '''Fuzzy matching for modules'''

        logging.debug(u'fuzzy match {}'.format(
            to_text(to_bytes(component, 'ascii', 'ignore'), 'ascii')))

        if component.lower() == u'core':
            return None

        # https://github.com/ansible/ansible/issues/18179
        if u'validate-modules' in component:
            return None

        # https://github.com/ansible/ansible/issues/20368
        if u'module_utils' in component:
            return None

        if u'new module' in component:
            return None

        # authorized_keys vs. authorized_key
        if component and component.endswith(u's'):
            tm = self.find_match(component[:-1])
            if tm:
                if not isinstance(tm, list):
                    return tm[u'name']
                elif len(tm) == 1:
                    return tm[0][u'name']
                else:
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb
                        epdb.st()

        match = None
        known_modules = []

        for k, v in six.iteritems(self.modules):
            if v[u'name'] in [u'include']:
                continue
            known_modules.append(v[u'name'])

        title = title.lower()
        title = title.replace(u':', u'')
        title_matches = [x for x in known_modules if x + u' module' in title]

        if not title_matches:
            title_matches = [
                x for x in known_modules if title.startswith(x + u' ')
            ]
            if not title_matches:
                title_matches = \
                    [x for x in known_modules if u' ' + x + u' ' in title]

            if title_matches:
                title_matches = [x for x in title_matches if x != u'at']

        # don't do singular word matching in title for ansible/ansible
        cmatches = None
        if component:
            cmatches = [x for x in known_modules if x in component]
            cmatches = [x for x in cmatches if not u'_' + x in component]

        # globs
        if not cmatches and u'*' in component:
            fmatches = [
                x for x in known_modules if fnmatch.fnmatch(x, component)
            ]
            if fmatches:
                cmatches = fmatches[:]

        if title_matches:
            # use title ... ?
            cmatches = [
                x for x in cmatches if x in title_matches and x not in [u'at']
            ]

        if cmatches:
            if len(cmatches) >= 1 and (u'*' not in component
                                       and u'modules' not in component):
                match = cmatches[0]
            else:
                match = cmatches[:]
            if not match:
                if u'docs.ansible.com' in component:
                    pass
                else:
                    pass
            logging.debug("module - component matches: %s" % cmatches)

        if not match:
            if len(title_matches) == 1:
                match = title_matches[0]
            else:
                logging.debug("module - title matches: %s" % title_matches)

        return match
Example #52
0
    def get_module_commits(self):
        keys = self.modules.keys()
        keys = sorted(keys)
        for k in keys:
            self.commits[k] = []
            cpath = os.path.join(self.gitrepo.checkoutdir, k)
            if not os.path.isfile(cpath):
                continue

            mtime = os.path.getmtime(cpath)
            refresh = False
            pfile = os.path.join(self.scraper_cache,
                                 k.replace('/', '_') + '.commits.pickle')

            if not os.path.isfile(pfile):
                refresh = True
            else:
                print(pfile)
                with open(pfile, 'rb') as f:
                    pdata = pickle.load(f)
                if pdata[0] == mtime:
                    self.commits[k] = pdata[1]
                else:
                    refresh = True

            if refresh:
                logging.info('refresh commit cache for %s' % k)
                cmd = 'cd %s; git log --follow %s' % (self.gitrepo.checkoutdir,
                                                      k)
                (rc, so, se) = run_command(cmd)
                for line in to_text(so).split('\n'):
                    if line.startswith('commit '):
                        commit = {
                            'name': None,
                            'email': None,
                            'login': None,
                            'hash': line.split()[-1],
                            'date': None
                        }

                    # Author: Matt Clay <*****@*****.**>
                    if line.startswith('Author: '):
                        line = line.replace('Author: ', '')
                        line = line.replace('<', '')
                        line = line.replace('>', '')
                        lparts = line.split()

                        if '@' in lparts[-1]:
                            commit['email'] = lparts[-1]
                            commit['name'] = ' '.join(lparts[:-1])
                        else:
                            pass

                        if commit['email'] and \
                                'noreply.github.com' in commit['email']:
                            commit['login'] = commit['email'].split('@')[0]

                    # Date:   Sat Jan 28 23:28:53 2017 -0800
                    if line.startswith('Date:'):
                        dstr = line.split(':', 1)[1].strip()
                        dstr = ' '.join(dstr.split(' ')[:-1])
                        commit['date'] = strip_time_safely(to_text(dstr))
                        self.commits[k].append(commit)

                with open(pfile, 'wb') as f:
                    pickle.dump((mtime, self.commits[k]), f)