Ejemplo n.º 1
0
    def _find_match(self, pattern, exact=False):

        logging.debug(u'exact:{} matching on {}'.format(exact, pattern))

        matches = []

        if isinstance(pattern, six.text_type):
            pattern = to_text(to_bytes(pattern, 'ascii', 'ignore'), 'ascii')

        for k, v in six.iteritems(self.modules):
            if v[u'name'] == pattern:
                logging.debug(u'match {} on name: {}'.format(k, v[u'name']))
                matches = [v]
                break

        if not matches:
            # search by key ... aka the filepath
            for k, v in six.iteritems(self.modules):
                if k == pattern:
                    logging.debug(u'match {} on key: {}'.format(k, k))
                    matches = [v]
                    break

        if not matches and not exact:
            # search by properties
            for k, v in six.iteritems(self.modules):
                for subkey in v.keys():
                    if v[subkey] == pattern:
                        logging.debug(u'match {} on subkey: {}'.format(
                            k, subkey))
                        matches.append(v)

        if not matches and not exact:
            # Levenshtein distance should workaround most typos
            distance_map = {}
            for k, v in six.iteritems(self.modules):
                mname = v.get(u'name')
                if not mname:
                    continue
                if isinstance(mname, six.text_type):
                    mname = to_text(to_bytes(mname, 'ascii', 'ignore'),
                                    'ascii')
                try:
                    res = Levenshtein.distance(pattern, mname)
                except TypeError as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb
                        epdb.st()
                distance_map[mname] = [res, k]
            res = sorted(distance_map.items(),
                         key=lambda x: x[1],
                         reverse=True)
            if len(pattern) > 3 > res[-1][1]:
                logging.debug(u'levenshtein ratio match: ({}) {} {}'.format(
                    res[-1][-1], res[-1][0], pattern))
                matches = [self.modules[res[-1][-1]]]

        return matches
Ejemplo n.º 2
0
    def _find_match(self, pattern, exact=False):

        logging.debug(u'exact:{} matching on {}'.format(exact, pattern))

        matches = []

        if isinstance(pattern, six.text_type):
            pattern = to_text(to_bytes(pattern,'ascii', 'ignore'), 'ascii')

        for k, v in six.iteritems(self.modules):
            if v[u'name'] == pattern:
                logging.debug(u'match {} on name: {}'.format(k, v[u'name']))
                matches = [v]
                break

        if not matches:
            # search by key ... aka the filepath
            for k, v in six.iteritems(self.modules):
                if k == pattern:
                    logging.debug(u'match {} on key: {}'.format(k, k))
                    matches = [v]
                    break

        if not matches and not exact:
            # search by properties
            for k, v in six.iteritems(self.modules):
                for subkey in v.keys():
                    if v[subkey] == pattern:
                        logging.debug(u'match {} on subkey: {}'.format(k, subkey))
                        matches.append(v)

        if not matches and not exact:
            # Levenshtein distance should workaround most typos
            distance_map = {}
            for k, v in six.iteritems(self.modules):
                mname = v.get(u'name')
                if not mname:
                    continue
                if isinstance(mname, six.text_type):
                    mname = to_text(to_bytes(mname, 'ascii', 'ignore'), 'ascii')
                try:
                    res = Levenshtein.distance(pattern, mname)
                except TypeError as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb; epdb.st()
                distance_map[mname] = [res, k]
            res = sorted(distance_map.items(), key=lambda x: x[1], reverse=True)
            if len(pattern) > 3 > res[-1][1]:
                logging.debug(u'levenshtein ratio match: ({}) {} {}'.format(res[-1][-1], res[-1][0], pattern))
                matches = [self.modules[res[-1][-1]]]

        return matches
Ejemplo n.º 3
0
    def get_summary(self, repo_url, otype, number):
        """Collect all the summary data for issues or pull requests ids

        Args:
            repo_url  (str): repository URL
            otype     (str): issue or pullRequest
            number    (str): Identifies the pull-request or issue, for example: 12345
        """
        owner = repo_url.split(u'/', 1)[0]
        repo = repo_url.split(u'/', 1)[1]

        template = self.environment.from_string(QUERY_TEMPLATE_SINGLE_NODE)

        query = template.render(OWNER=owner, REPO=repo, OBJECT_TYPE=otype, OBJECT_PARAMS='number: %s' % number, FIELDS=QUERY_FIELDS)

        payload = {
            u'query': to_bytes(query, 'ascii', 'ignore').strip(),
            u'variables': u'{}',
            u'operationName': None
        }
        if six.PY3:
            payload[u'query'] = to_text(payload[u'query'], 'ascii')

        rr = requests.post(self.baseurl, headers=self.headers, data=json.dumps(payload))
        data = rr.json()

        node = data[u'data'][u'repository'][otype]
        if node is None:
            return

        self.update_node(node, otype, owner, repo)

        return node
Ejemplo n.º 4
0
    def get_cached_request(self, url):
        '''Use a combination of sqlite and ondisk caching to GET an api resource'''

        url_parts = url.split('/')

        cdf = os.path.join(self.cached_requests_dir,
                           url.replace('https://', '') + '.json.gz')
        cdd = os.path.dirname(cdf)
        if not os.path.exists(cdd):
            os.makedirs(cdd)

        # FIXME - commits are static and can always be used from cache.
        if url_parts[-2] == 'commits' and os.path.exists(cdf):
            with gzip.open(cdf, 'r') as f:
                data = json.loads(f.read())
            return data

        headers = {
            u'Accept': u','.join(self.accepts_headers),
            u'Authorization': u'Bearer %s' % self.token,
        }

        meta = ADB.get_github_api_request_meta(url, token=self.token)
        if meta is None:
            meta = {}

        # https://developer.github.com/v3/#conditional-requests
        etag = meta.get('etag')
        if etag and os.path.exists(cdf):
            headers['If-None-Match'] = etag

        rr = requests.get(url, headers=headers)

        if rr.status_code == 304:
            # not modified
            with open(cdf, 'r') as f:
                data = json.loads(f.read())
        else:
            data = rr.json()

            # handle ratelimits ...
            if isinstance(data, dict) and data.get(u'message'):
                if data[u'message'].lower().startswith(
                        u'api rate limit exceeded'):
                    raise RateLimitError()

            # cache data to disk
            logging.debug('write %s' % cdf)
            with gzip.open(cdf, 'w') as f:
                f.write(to_bytes(json.dumps(data)))

        # save the meta
        ADB.set_github_api_request_meta(url, rr.headers, cdf, token=self.token)

        # pagination
        if hasattr(rr, u'links') and rr.links and rr.links.get(u'next'):
            _data = self.get_request(rr.links[u'next'][u'url'])
            data += _data

        return data
Ejemplo n.º 5
0
    def get_summary(self, repo_url, otype, number):
        """Collect all the summary data for issues or pull requests ids

        Args:
            repo_url  (str): repository URL
            otype     (str): issue or pullRequest
            number    (str): Identifies the pull-request or issue, for example: 12345
        """
        owner = repo_url.split(u'/', 1)[0]
        repo = repo_url.split(u'/', 1)[1]

        template = self.environment.from_string(QUERY_TEMPLATE_SINGLE_NODE)

        query = template.render(OWNER=owner, REPO=repo, OBJECT_TYPE=otype, OBJECT_PARAMS='number: %s' % number, FIELDS=QUERY_FIELDS)

        payload = {
            u'query': to_bytes(query, 'ascii', 'ignore').strip(),
            u'variables': u'{}',
            u'operationName': None
        }
        if six.PY3:
            payload[u'query'] = to_text(payload[u'query'], 'ascii')

        rr = requests.post(self.baseurl, headers=self.headers, data=json.dumps(payload))
        data = rr.json()

        node = data[u'data'][u'repository'][otype]
        if node is None:
            return

        self.update_node(node, otype, owner, repo)

        return node
Ejemplo n.º 6
0
    def get_usernames_from_filename_blame(self, owner, repo, branch, filepath):

        template = self.environment.from_string(QUERY_TEMPLATE_BLAME)
        committers = defaultdict(set)
        emailmap = {}

        query = template.render(OWNER=owner,
                                REPO=repo,
                                BRANCH=branch,
                                PATH=filepath)

        payload = {
            u'query':
            to_text(
                to_bytes(query, 'ascii', 'ignore'),
                'ascii',
            ).strip(),
            u'variables':
            u'{}',
            u'operationName':
            None
        }
        response = self.requests(payload)
        data = response.json()

        nodes = data[u'data'][u'repository'][u'ref'][u'target'][u'blame'][
            u'ranges']
        """
        [
            'commit':
            {
                'oid': 'a3132e5dd6acc526ce575f6db134169c7090f72d',
                'author':
                {
                    'email': '*****@*****.**',
                    'user': {'login': '******'}
                }
            }
        ]
        """
        for node in nodes:
            node = node[u'commit']
            if not node[u'author'][u'user']:
                continue
            github_id = node[u'author'][u'user'][u'login']
            committers[github_id].add(node[u'oid'])
            # emails come from 'git log --follow' but all github id aren't fetch:
            # - GraphQL/git 'blame' don't list all commits
            # - GraphQL 'history' neither because 'history' is like 'git log' but without '--follow'
            email = node[u'author'].get(u'email')
            if email and email not in emailmap:
                emailmap[email] = github_id

        for github_id, commits in committers.items():
            committers[github_id] = list(commits)
        return committers, emailmap
Ejemplo n.º 7
0
    def get_usernames_from_filename_blame(self, owner, repo, branch, filepath):

        template = self.environment.from_string(QUERY_TEMPLATE_BLAME)
        committers = defaultdict(set)
        emailmap = {}

        query = template.render(OWNER=owner, REPO=repo, BRANCH=branch, PATH=filepath)

        payload = {
            u'query': to_text(
                to_bytes(query, 'ascii', 'ignore'),
                'ascii',
            ).strip(),
            u'variables': u'{}',
            u'operationName': None
        }
        response = self.requests(payload)
        data = response.json()

        nodes = data[u'data'][u'repository'][u'ref'][u'target'][u'blame'][u'ranges']
        """
        [
            'commit':
            {
                'oid': 'a3132e5dd6acc526ce575f6db134169c7090f72d',
                'author':
                {
                    'email': '*****@*****.**',
                    'user': {'login': '******'}
                }
            }
        ]
        """
        for node in nodes:
            node = node[u'commit']
            if not node[u'author'][u'user']:
                continue
            github_id = node[u'author'][u'user'][u'login']
            committers[github_id].add(node[u'oid'])
            # emails come from 'git log --follow' but all github id aren't fetch:
            # - GraphQL/git 'blame' don't list all commits
            # - GraphQL 'history' neither because 'history' is like 'git log' but without '--follow'
            email = node[u'author'].get(u'email')
            if email and email not in emailmap:
                emailmap[email] = github_id

        for github_id, commits in committers.items():
            committers[github_id] = list(commits)
        return committers, emailmap
Ejemplo n.º 8
0
    def get_test_results(self):
        if self.state in ('pending', 'inProgress', None):
            return [], False

        failed_jobs = [j for j in self.jobs if j['result'] == 'failed']
        if not failed_jobs:
            return [], False

        results = []
        ci_verified = True
        failed_jobs_with_artifact = 0
        for job in failed_jobs:
            for artifact in self.artifacts:
                if job['id'] != artifact['source']:
                    continue
                failed_jobs_with_artifact += 1
                for artifact_json in self.get_artifact(
                        artifact['name'], artifact['resource']['downloadUrl']):
                    if not artifact_json['verified']:
                        ci_verified = False

                    result_data = ''
                    for result in artifact_json['results']:
                        result_data += result['message'] + result['output']

                    results.append({
                        'contents': {
                            'results': artifact_json['results'],
                        },
                        'run_id':
                        self.build_id,
                        'job_id':
                        hashlib.md5(to_bytes(result_data)).hexdigest(),
                        'path':
                        None,
                    })

        if ci_verified and len(failed_jobs) != failed_jobs_with_artifact:
            ci_verified = False

        return results, ci_verified
Ejemplo n.º 9
0
 def _write_cache_file(self, cfile, data):
     with gzip.open(cfile, 'w') as f:
         f.write(to_bytes(json.dumps(data)))
Ejemplo n.º 10
0
def extract_template_data(body, issue_class='issue', sections=None):
    if sections is None:
        sections = SECTIONS

    # pointless to parse a null body
    if not body:
        return {}

    # simple find or fuzzy find the sections within the body
    tdict = find_sections(body) or fuzzy_find_sections(body, sections)
    if not tdict:
        return {}

    # lowercase the keys
    ndict = {}
    for k, v in tdict.items():
        ku = k.lower()
        if ku == 'plugin name':
            ku = 'component name'
        ndict[ku] = v
    if ndict != tdict:
        tdict = ndict.copy()

    # make a raw component section for later processing
    component_raw = tdict.get('component name', '')

    # https://github.com/ansible/ansibullbot/issues/359
    if ',' in tdict.get('component name', ''):
        tdict['component name'] = tdict['component name'].replace(',', '\n')

    # https://github.com/ansible/ansibullbot/issues/385
    if ' and ' in tdict.get('component name', ''):
        tdict['component name'] = tdict['component name'].replace(
            ' and ', '\n')

    # cleanup the sections
    for k, v in tdict.items():
        # remove markdown comments from the sections
        v = remove_markdown_comments(v)

        # remove non-ascii chars
        v = to_text(to_bytes(v, 'ascii', errors='ignore'), 'ascii')

        # normalize newlines and return chars
        v = v.replace('\r', '\n')

        # remove pre-ceding and trailing newlines
        v = v.strip()

        # remove trailing hashes
        while v.endswith('#'):
            v = v[:-1]

        # remove pre-ceding and trailing newlines (AGAIN)
        v = v.strip()

        # clean more on critical sections
        if 'step' not in k and 'result' not in k:

            # https://github.com/ansible/ansible-modules-extras/issues/2262
            if k == 'component name':
                v = v.lower()

            if k == 'component name' and 'module' in v:
                if '/modules/' in v or \
                        'module_util' in v or \
                        'module_utils/' in v or \
                        'validate-modules' in v or\
                        'module_common' in v:
                    # https://github.com/ansible/ansible/issues/20563
                    # https://github.com/ansible/ansible/issues/18179
                    pass
                else:
                    # some modules have the word "_module" in their name
                    # https://github.com/ansible/ansibullbot/issues/198
                    # https://github.com/ansible/ansible-modules-core/issues/4159
                    # https://github.com/ansible/ansible-modules-core/issues/5328
                    reg = re.compile(r'\S+_module')
                    match = reg.match(v)
                    if match:
                        v = v[match.pos:match.end()]
                    else:
                        # https://github.com/ansible/ansibullbot/issues/385
                        if 'modules' in v:
                            v = v.replace('modules', ' ')
                        else:
                            v = v.replace('module', ' ')

            # remove useless chars
            exclude = None
            if k == 'component name':
                exclude = ['__']
            v = clean_bad_characters(v, exclude=exclude)

            # clean up empty lines
            vlines = v.split('\n')
            vlines = [x for x in vlines if x.strip()]
            vlines = [x.strip() for x in vlines if x.strip()]
            v = '\n'.join(vlines)

            # remove pre-ceding special chars
            for bc in ['-', '*']:
                if v:
                    if v[0] == bc:
                        v = v[1:]
                    v = v.strip()

            # keep just the first line for types and components
            if k in ['issue type', 'component name']:
                if v:
                    vlines = v.split('\n')
                    # https://github.com/ansible/ansible-modules-core/issues/3085
                    vlines = [x for x in vlines if 'pick one' not in x]
                    v = vlines[0]

            # https://github.com/ansible/ansible-modules-core/issues/4060
            if k in ['issue type']:
                if '/' in v:
                    v = v.split('/')
                    if k == ['issue type']:
                        v = v[0]
                    else:
                        v = v[-1]
                    v = v.strip()

            if issue_class == 'issue':
                if k == 'issue type' and v != 'bug report' and 'bug' in v.lower(
                ):
                    v = 'bug report'
                elif k == 'issue type' and v != 'feature idea' and 'feature' in v.lower(
                ):
                    v = 'feature idea'
            elif issue_class == 'pullrequest':
                if k == 'issue type' and v != 'bugfix pull request' and 'bug' in v.lower(
                ):
                    v = 'bugfix pull request'
                elif k == 'issue type' and v != 'feature pull request' and 'feature' in v.lower(
                ):
                    v = 'feature pull request'
                elif k == 'issue type' and v != 'new module pull request' and 'new module' in v.lower(
                ):
                    v = 'new module pull request'
                elif k == 'issue type' and v != 'docs pull request' and 'docs' in v.lower(
                ):
                    v = 'docs pull request'
                elif k == 'issue type' and v != 'test pull request' and 'test' in v.lower(
                ):
                    v = 'test pull request'

        if v == 'paste below':
            v = ''

        # save
        tdict[k] = v

    # quick clean and add raw component to the dict
    component_raw = remove_markdown_comments(component_raw)
    component_raw = clean_bad_characters(component_raw, exclude=['__'])
    component_raw = '\n'.join(
        [x.strip() for x in component_raw.split('\n') if x.strip()])
    component_raw = '\n'.join(
        [x for x in component_raw.split('\n') if not x.startswith('#')])
    tdict['component_raw'] = component_raw

    return tdict
Ejemplo n.º 11
0
def extract_template_data(body,
                          issue_number=None,
                          issue_class='issue',
                          sections=SECTIONS):

    # this is the final result to return
    tdict = {}

    if not body:
        return tdict

    upper_body = body.upper()

    # make a map of locations where each section starts
    match_map = {}
    for section in sections:
        # http://www.tutorialspoint.com/python/string_find.htm
        # str.find(str, beg=0 end=len(string))
        match = upper_body.find(section)
        if match != -1:
            match_map[section] = match

    if not match_map:
        return {}

    # what are the header(s) being used?
    headers = []
    for k, v in match_map.items():
        try:
            before = upper_body[v - 1]
            after = upper_body[v + len(k)]
            header = before + u'${section}' + after
            headers.append(header)
        except Exception as e:
            pass

    # pick the most common header and re-search with it
    if len(sorted(set(headers))) > 1:
        choices = sorted(set(headers))
        choice_totals = []
        for choice in choices:
            ctotal = len([x for x in headers if x == choice])
            choice_totals.append((ctotal, choice))
        choice_totals.sort(key=lambda tup: tup[0])
        sheader = choice_totals[-1][1]

        match_map = {}
        t = Template(sheader)
        for section in SECTIONS:
            try:
                tofind = t.substitute(section=section)
            except Exception as e:
                if C.DEFAULT_BREAKPOINTS:
                    logging.error(u'breakpoint!')
                    import epdb
                    epdb.st()
                else:
                    raise Exception(u'substitution failed: %s' % to_text(e))
            match = upper_body.find(tofind)
            if match != -1:
                match_map[section] = match + 1

        # re-do for missing sections with less common header(s)
        for section in SECTIONS:
            if section in match_map:
                continue
            for choice in choices:
                t = Template(choice)
                tofind = t.substitute(section=section)
                match = upper_body.find(tofind)
                if match != -1:
                    match_map[section] = match + 1
                    break

        if not match_map:
            return {}

    elif len(headers) <= 1:
        if headers and \
                (u'#' not in headers[0] and
                 u':' not in headers[0] and
                 u'*' not in headers[0]):
            return {}
        else:
            if C.DEFAULT_BREAKPOINTS:
                logging.error(u'breakpoint!')
                import epdb
                epdb.st()

    # sort mapping by element id
    match_map = sorted(match_map.items(), key=operator.itemgetter(1))

    if match_map and u'ISSUE TYPE' not in [x[0] for x in match_map]:
        if match_map[0][1] > 10:
            match_map.insert(0, (u'ISSUE TYPE', 0))

    # extract the sections based on their indexes
    total_indexes = len(match_map) - 1
    for idx, x in enumerate(match_map):

        if x[1] > 0:
            start_index = x[1] + (len(x[0]))
        else:
            start_index = 0

        # if last index, slice to the end
        if idx >= total_indexes:
            tdict[x[0]] = body[start_index:]
        else:
            # slice to the next section
            stop_index = match_map[idx + 1][1]
            tdict[x[0]] = body[start_index:stop_index]

    # lowercase the keys
    ndict = {}
    for k, v in six.iteritems(tdict):
        ku = k.lower()
        if ku == u'plugin name':
            ku = u'component name'
        ndict[ku] = v
    if ndict != tdict:
        tdict = ndict.copy()

    # make a raw component section for later processing
    component_raw = tdict.get(u'component name', u'')

    # https://github.com/ansible/ansibullbot/issues/359
    if u',' in tdict.get(u'component name', u''):
        tdict[u'component name'] = tdict[u'component name'].replace(
            u',', u'\n')

    # https://github.com/ansible/ansibullbot/issues/385
    if u' and ' in tdict.get(u'component name', u''):
        tdict[u'component name'] = tdict[u'component name'].replace(
            u' and ', u'\n')

    # cleanup the sections
    for k, v in six.iteritems(tdict):
        # remove markdown comments from the sections
        v = remove_markdown_comments(v)

        # remove non-ascii chars
        v = to_text(to_bytes(v, 'ascii', errors='ignore'), 'ascii')

        # normalize newlines and return chars
        v = v.replace(u'\r', u'\n')

        # remove pre-ceding and trailing newlines
        v = v.strip()

        # remove trailing hashes
        while v.endswith(u'#'):
            v = v[:-1]

        # remove pre-ceding and trailing newlines (AGAIN)
        v = v.strip()

        # clean more on critical sections
        if u'step' not in k and u'result' not in k:

            # https://github.com/ansible/ansible-modules-extras/issues/2262
            if k == u'component name':
                v = v.lower()

            if k == u'component name' and u'module' in v:
                if u'/modules/' in v or \
                        u'module_util' in v or \
                        u'module_utils/' in v or \
                        u'validate-modules' in v or\
                        u'module_common' in v:
                    # https://github.com/ansible/ansible/issues/20563
                    # https://github.com/ansible/ansible/issues/18179
                    pass
                else:
                    # some modules have the word "_module" in their name
                    # https://github.com/ansible/ansibullbot/issues/198
                    # https://github.com/ansible/ansible-modules-core/issues/4159
                    # https://github.com/ansible/ansible-modules-core/issues/5328
                    reg = re.compile(u'\S+_module')
                    match = reg.match(v)
                    if match:
                        v = v[match.pos:match.end()]
                    else:
                        # https://github.com/ansible/ansibullbot/issues/385
                        if u'modules' in v:
                            v = v.replace(u'modules', u' ')
                        else:
                            v = v.replace(u'module', u' ')

            # remove useless chars
            v = clean_bad_characters(v)

            # clean up empty lines
            vlines = v.split(u'\n')
            vlines = [x for x in vlines if x.strip()]
            vlines = [x.strip() for x in vlines if x.strip()]
            v = u'\n'.join(vlines)

            # remove pre-ceding special chars
            for bc in [u'-', u'*']:
                if v:
                    if v[0] == bc:
                        v = v[1:]
                    v = v.strip()

            # keep just the first line for types and components
            if k in [u'issue type', u'component name']:
                if v:
                    vlines = v.split(u'\n')
                    # https://github.com/ansible/ansible-modules-core/issues/3085
                    vlines = [x for x in vlines if u'pick one' not in x]
                    v = vlines[0]

            # https://github.com/ansible/ansible-modules-core/issues/4060
            if k in [u'issue type']:
                if u'/' in v:
                    v = v.split(u'/')
                    if k == [u'issue type']:
                        v = v[0]
                    else:
                        v = v[-1]
                    v = v.strip()

            if issue_class == u'issue':
                if k == u'issue type' and v != u'bug report' and u'bug' in v.lower(
                ):
                    v = u'bug report'
                elif k == u'issue type' and v != u'feature idea' and u'feature' in v.lower(
                ):
                    v = u'feature idea'
            elif issue_class == u'pullrequest':
                if k == u'issue type' and v != u'bugfix pull request' and u'bug' in v.lower(
                ):
                    v = u'bugfix pull request'
                elif k == u'issue type' and v != u'feature pull request' and u'feature' in v.lower(
                ):
                    v = u'feature pull request'
                elif k == u'issue type' and v != u'new module pull request' and u'new module' in v.lower(
                ):
                    v = u'new module pull request'
                elif k == u'issue type' and v != u'docs pull request' and u'docs' in v.lower(
                ):
                    v = u'docs pull request'
                elif k == u'issue type' and v != u'test pull request' and u'test' in v.lower(
                ):
                    v = u'test pull request'

        # save
        tdict[k] = v

    # quick clean and add raw component to the dict
    component_raw = remove_markdown_comments(component_raw)
    component_raw = clean_bad_characters(component_raw, exclude=None)
    component_raw = u'\n'.join(
        [x.strip() for x in component_raw.split(u'\n') if x.strip()])
    component_raw = u'\n'.join(
        [x for x in component_raw.split(u'\n') if not x.startswith(u'#')])
    tdict[u'component_raw'] = component_raw

    return tdict
def main():
    initialize_sentry()

    # define where to dump the resulting files
    if len(sys.argv) > 1:
        destdir = sys.argv[1]
    else:
        destdir = '/tmp'

    if not os.path.isdir(destdir):
        os.makedirs(destdir)

    ISSUES = {}
    BYFILE = {}
    BYISSUE = {}
    BYMAINTAINER = {}

    summaries = get_receiver_summaries('ansible', 'ansible', state='open')
    for summary in summaries:
        number = summary['github_number']
        this_meta = get_receiver_metadata('ansible', 'ansible', number=number)

        if not this_meta:
            continue

        this_meta = this_meta[0]
        url = this_meta['html_url']
        ISSUES[url] = this_meta
        BYISSUE[url] = []

        try:
            this_meta.get('component_matches', [])
        except Exception as e:
            print(e)
            continue

        for component in this_meta.get('component_matches', []):
            # we seem to have some variation in the keys ...
            filename = None
            try:
                filename = component['repo_filename']
            except KeyError:
                filename = component['filename']

            if not filename:
                continue

            if 'maintainers' in component:
                for maintainer in component['maintainers']:
                    if maintainer not in BYMAINTAINER:
                        BYMAINTAINER[maintainer] = []
                    if url not in BYMAINTAINER[maintainer]:
                        BYMAINTAINER[maintainer].append(url)

            BYISSUE[url].append(filename)

            if filename not in BYFILE:
                BYFILE[filename] = []
            if url not in BYFILE[filename]:
                BYFILE[filename].append(url)

    destfile = os.path.join(destdir, 'byissue.json')
    with open(destfile, 'w') as f:
        f.write(json.dumps(BYISSUE, indent=2, sort_keys=True))

    destfile = os.path.join(destdir, 'byfile.json')
    with open(destfile, 'w') as f:
        f.write(json.dumps(BYFILE, indent=2, sort_keys=True))

    tuples = list(BYFILE.items())
    for idx, x in enumerate(tuples):
        x = [x[0]] + x[1]
        tuples[idx] = x
    tuples.sort(key=len)
    tuples.reverse()

    destfile = os.path.join(destdir, 'byfile_sorted.txt')
    with open(destfile, 'wb') as f:
        for tup in tuples:
            f.write(b'%s\n' % to_bytes(tup[0]))
            for issue in tup[1:]:
                title = to_bytes(ISSUES[issue]['title'])
                f.write(b'\t%s\t%s\n' % (to_bytes(issue), title))

    destfile = os.path.join(destdir, 'byfile_sorted.html')
    with open(destfile, 'wb') as f:
        for idp, tup in enumerate(tuples):
            f.write(
                b'<div style="background-color: #cfc ; padding: 10px; border: 1px solid green;">\n'
            )
            file_ref = b'%d. <a href="https://github.com/ansible/ansible/blob/devel/%s">https://github.com/ansible/ansible/blob/devel/%s</a> %d total' % (
                (idp + 1), to_bytes(tup[0]), to_bytes(tup[0]), len(tup[1:]))
            f.write(b'%s\n' % (file_ref))
            f.write(b'</div>')
            f.write(b'<br>\n')
            for issue in tup[1:]:
                title = to_bytes(ISSUES[issue]['title'])
                issue = to_bytes(issue)
                issue_ref = b'<a href="%s">%s</a>' % (issue, issue)
                f.write(b'\t%s\t%s<br>\n' % (issue_ref, title))
            f.write(b'<br>\n')

    tuples = list(BYMAINTAINER.items())
    for idx, x in enumerate(tuples):
        x = [x[0]] + x[1]
        tuples[idx] = x
    tuples.sort(key=len)
    tuples.reverse()

    destfile = os.path.join(destdir, 'bymaintainer.json')
    with open(destfile, 'w') as f:
        f.write(json.dumps(BYMAINTAINER, indent=2, sort_keys=True))

    destfile = os.path.join(destdir, 'bymaintainer_sorted.txt')
    with open(destfile, 'wb') as f:
        for tup in tuples:
            f.write(b'%s\n' % to_bytes(tup[0]))
            for issue in tup[1:]:
                f.write(b'\t%s\n' % to_bytes(issue))
Ejemplo n.º 13
0
def main():
    initialize_sentry()

    # define where to dump the resulting files
    if len(sys.argv) > 1:
        destdir = sys.argv[1]
    else:
        destdir = '/tmp'

    if not os.path.isdir(destdir):
        os.makedirs(destdir)

    ISSUES = {}
    BYFILE = {}
    BYISSUE = {}
    BYMAINTAINER = {}

    summaries = get_receiver_summaries('ansible', 'ansible', state='open')
    for summary in summaries:
        number = summary['github_number']
        this_meta = get_receiver_metadata('ansible', 'ansible', number=number)

        if not this_meta:
            continue

        this_meta = this_meta[0]
        url = this_meta['html_url']
        ISSUES[url] = this_meta
        BYISSUE[url] = []

        try:
            this_meta.get('component_matches', [])
        except Exception as e:
            print(e)
            continue

        for component in this_meta.get('component_matches', []):
            # we seem to have some variation in the keys ...
            filename = None
            try:
                filename = component['repo_filename']
            except KeyError:
                filename = component['filename']

            if not filename:
                continue

            if 'maintainers' in component:
                for maintainer in component['maintainers']:
                    if maintainer not in BYMAINTAINER:
                        BYMAINTAINER[maintainer] = []
                    if url not in BYMAINTAINER[maintainer]:
                        BYMAINTAINER[maintainer].append(url)

            BYISSUE[url].append(filename)

            if filename not in BYFILE:
                BYFILE[filename] = []
            if url not in BYFILE[filename]:
                BYFILE[filename].append(url)

    destfile = os.path.join(destdir, 'byissue.json')
    with open(destfile, 'w') as f:
        f.write(json.dumps(BYISSUE, indent=2, sort_keys=True))

    destfile = os.path.join(destdir, 'byfile.json')
    with open(destfile, 'w') as f:
        f.write(json.dumps(BYFILE, indent=2, sort_keys=True))

    tuples = BYFILE.items()
    for idx, x in enumerate(tuples):
        x = [x[0]] + x[1]
        tuples[idx] = x
    tuples.sort(key=len)
    tuples.reverse()

    destfile = os.path.join(destdir, 'byfile_sorted.txt')
    with open(destfile, 'wb') as f:
        for tup in tuples:
            f.write(b'%s\n' % to_bytes(tup[0]))
            for issue in tup[1:]:
                issue = to_bytes(issue)
                title = to_bytes(ISSUES[issue]['title'])
                f.write(b'\t%s\t%s\n' % (issue, title))

    destfile = os.path.join(destdir, 'byfile_sorted.html')
    with open(destfile, 'wb') as f:
        for idp, tup in enumerate(tuples):
            f.write(b'<div style="background-color: #cfc ; padding: 10px; border: 1px solid green;">\n')
            file_ref = b'%s. <a href="https://github.com/ansible/ansible/blob/devel/%s">https://github.com/ansible/ansible/blob/devel/%s</a> %s total' % (
                (idp+1), to_bytes(tup[0]), to_bytes(tup[0]), len(tup[1:])
            )
            f.write(b'%s\n' % (file_ref))
            f.write(b'</div>')
            f.write(b'<br>\n')
            for issue in tup[1:]:
                issue = to_bytes(issue)
                title = to_bytes(ISSUES[issue]['title'])
                issue_ref = b'<a href="%s">%s</a>' % (issue, issue)
                f.write(b'\t%s\t%s<br>\n' % (issue_ref, title))
            f.write(b'<br>\n')

    tuples = BYMAINTAINER.items()
    for idx, x in enumerate(tuples):
        x = [x[0]] + x[1]
        tuples[idx] = x
    tuples.sort(key=len)
    tuples.reverse()

    destfile = os.path.join(destdir, 'bymaintainer.json')
    with open(destfile, 'w') as f:
        f.write(json.dumps(BYMAINTAINER, indent=2, sort_keys=True))

    destfile = os.path.join(destdir, 'bymaintainer_sorted.txt')
    with open(destfile, 'wb') as f:
        for tup in tuples:
            f.write(b'%s\n' % to_bytes(tup[0]))
            for issue in tup[1:]:
                f.write(b'\t%s\n' % to_bytes(issue))
Ejemplo n.º 14
0
    def fuzzy_match(self, repo=None, title=None, component=None):
        '''Fuzzy matching for modules'''

        logging.debug(u'fuzzy match {}'.format(
            to_text(to_bytes(component, 'ascii', 'ignore'), 'ascii')))

        if component.lower() == u'core':
            return None

        # https://github.com/ansible/ansible/issues/18179
        if u'validate-modules' in component:
            return None

        # https://github.com/ansible/ansible/issues/20368
        if u'module_utils' in component:
            return None

        if u'new module' in component:
            return None

        # authorized_keys vs. authorized_key
        if component and component.endswith(u's'):
            tm = self.find_match(component[:-1])
            if tm:
                if not isinstance(tm, list):
                    return tm[u'name']
                elif len(tm) == 1:
                    return tm[0][u'name']
                else:
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb
                        epdb.st()

        match = None
        known_modules = []

        for k, v in six.iteritems(self.modules):
            if v[u'name'] in [u'include']:
                continue
            known_modules.append(v[u'name'])

        title = title.lower()
        title = title.replace(u':', u'')
        title_matches = [x for x in known_modules if x + u' module' in title]

        if not title_matches:
            title_matches = [
                x for x in known_modules if title.startswith(x + u' ')
            ]
            if not title_matches:
                title_matches = \
                    [x for x in known_modules if u' ' + x + u' ' in title]

            if title_matches:
                title_matches = [x for x in title_matches if x != u'at']

        # don't do singular word matching in title for ansible/ansible
        cmatches = None
        if component:
            cmatches = [x for x in known_modules if x in component]
            cmatches = [x for x in cmatches if not u'_' + x in component]

        # globs
        if not cmatches and u'*' in component:
            fmatches = [
                x for x in known_modules if fnmatch.fnmatch(x, component)
            ]
            if fmatches:
                cmatches = fmatches[:]

        if title_matches:
            # use title ... ?
            cmatches = [
                x for x in cmatches if x in title_matches and x not in [u'at']
            ]

        if cmatches:
            if len(cmatches) >= 1 and (u'*' not in component
                                       and u'modules' not in component):
                match = cmatches[0]
            else:
                match = cmatches[:]
            if not match:
                if u'docs.ansible.com' in component:
                    pass
                else:
                    pass
            logging.debug("module - component matches: %s" % cmatches)

        if not match:
            if len(title_matches) == 1:
                match = title_matches[0]
            else:
                logging.debug("module - title matches: %s" % title_matches)

        return match
Ejemplo n.º 15
0
def write_gzip_json_file(path, data):
    with gzip.open(path, 'w') as f:
        f.write(to_bytes(json.dumps(data)))
Ejemplo n.º 16
0
    def fuzzy_match(self, repo=None, title=None, component=None):
        '''Fuzzy matching for modules'''

        logging.debug(u'fuzzy match {}'.format(
            to_text(to_bytes(component, 'ascii', 'ignore'), 'ascii'))
        )

        if component.lower() == u'core':
            return None

        # https://github.com/ansible/ansible/issues/18179
        if u'validate-modules' in component:
            return None

        # https://github.com/ansible/ansible/issues/20368
        if u'module_utils' in component:
            return None

        if u'new module' in component:
            return None

        # authorized_keys vs. authorized_key
        if component and component.endswith(u's'):
            tm = self.find_match(component[:-1])
            if tm:
                if not isinstance(tm, list):
                    return tm[u'name']
                elif len(tm) == 1:
                    return tm[0][u'name']
                else:
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb; epdb.st()

        match = None
        known_modules = []

        for k, v in six.iteritems(self.modules):
            if v[u'name'] in [u'include']:
                continue
            known_modules.append(v[u'name'])

        title = title.lower()
        title = title.replace(u':', u'')
        title_matches = [x for x in known_modules if x + u' module' in title]

        if not title_matches:
            title_matches = [x for x in known_modules
                             if title.startswith(x + u' ')]
            if not title_matches:
                title_matches = \
                    [x for x in known_modules if u' ' + x + u' ' in title]

            if title_matches:
                title_matches = [x for x in title_matches if x != u'at']

        # don't do singular word matching in title for ansible/ansible
        cmatches = None
        if component:
            cmatches = [x for x in known_modules if x in component]
            cmatches = [x for x in cmatches if not u'_' + x in component]

        # globs
        if not cmatches and u'*' in component:
            fmatches = [x for x in known_modules if fnmatch.fnmatch(x, component)]
            if fmatches:
                cmatches = fmatches[:]

        if title_matches:
            # use title ... ?
            cmatches = [x for x in cmatches if x in title_matches and x not in [u'at']]

        if cmatches:
            if len(cmatches) >= 1 and (u'*' not in component and u'modules' not in component):
                match = cmatches[0]
            else:
                match = cmatches[:]
            if not match:
                if u'docs.ansible.com' in component:
                    pass
                else:
                    pass
            logging.debug("module - component matches: %s" % cmatches)

        if not match:
            if len(title_matches) == 1:
                match = title_matches[0]
            else:
                logging.debug("module - title matches: %s" % title_matches)

        return match
Ejemplo n.º 17
0
    def get_summaries(self, owner, repo, otype='issues', last=None, first='first: 100', states='states: OPEN', paginate=True):
        """Collect all the summary data for issues or pullreuests

        Args:
            owner     (str): the github namespace
            repo      (str): the github repository
            otype     (str): issues or pullRequests
            first     (str): number of nodes per page, oldest to newest
            last      (str): number of nodes per page, newest to oldest
            states    (str): open or closed issues
            paginate (bool): recurse through page results

        """

        templ = self.environment.from_string(QUERY_TEMPLATE)

        # after: "$endCursor"
        after = None

        '''
        # first: 100
        first = 'first: 100'
        # states: OPEN
        states = 'states: OPEN'
        '''

        nodes = []
        pagecount = 0
        while True:
            logging.debug(u'%s/%s %s pagecount:%s nodecount: %s' %
                          (owner, repo, otype, pagecount, len(nodes)))

            issueparams = u', '.join([x for x in [states, first, last, after] if x])
            query = templ.render(OWNER=owner, REPO=repo, OBJECT_TYPE=otype, OBJECT_PARAMS=issueparams, FIELDS=QUERY_FIELDS)

            payload = {
                u'query': to_bytes(query, 'ascii', 'ignore').strip(),
                u'variables': u'{}',
                u'operationName': None
            }
            rr = requests.post(self.baseurl, headers=self.headers, data=json.dumps(payload))
            if not rr.ok:
                break
            data = rr.json()
            if not data:
                break

            # keep each edge/node/issue
            for edge in data[u'data'][u'repository'][otype][u'edges']:
                node = edge[u'node']
                self.update_node(node, otype.lower()[:-1], owner, repo)
                nodes.append(node)

            if not paginate:
                break

            pageinfo = data.get(u'data', {}).get(u'repository', {}).get(otype, {}).get(u'pageInfo')
            if not pageinfo:
                break
            if not pageinfo.get(u'hasNextPage'):
                break

            after = u'after: "%s"' % pageinfo[u'endCursor']
            pagecount += 1

        return nodes