Example #1
0
 def setUpClass(cls):
     cls.con = index.Connector(index='repoxplorertest')
     cls.conp = index.Connector(
         index='repoxplorertest', index_suffix='projects')
     c = Commits(cls.con)
     c.add_commits(COMMITS)
     cls.db = set_projects_definition(cls.conp)
Example #2
0
    def authors(self, pid=None, tid=None, cid=None, gid=None,
                dfrom=None, dto=None, inc_merge_commit=None,
                inc_repos=None, metadata=None, exc_groups=None,
                inc_groups=None):

        projects_index = Projects()
        idents = Contributors()

        query_kwargs = utils.resolv_filters(
            projects_index, idents, pid, tid, cid, gid,
            dfrom, dto, inc_repos, inc_merge_commit,
            metadata, exc_groups, inc_groups)

        c = Commits(index.Connector())
        if not c.get_commits_amount(**query_kwargs):
            return []
        ret = c.get_authors_histo(**query_kwargs)[1]
        for bucket in ret:
            _idents = idents.get_idents_by_emails(bucket['authors_email'])
            bucket['value'] = len(_idents)
            bucket['date'] = bucket['key_as_string']
            del bucket['authors_email']
            del bucket['doc_count']
            del bucket['key_as_string']
            del bucket['key']

        return ret
Example #3
0
    def contributor(self, cid=None):
        if not cid:
            abort(404,
                  detail="No contributor specified")

        c = Commits(index.Connector())
        idents = Contributors()

        try:
            cid = utils.decrypt(xorkey, cid)
        except Exception:
            abort(404,
                  detail="The cid is incorrectly formated")

        _, ident = idents.get_ident_by_id(cid)
        if not ident:
            # No ident has been declared for that contributor
            ident = list(idents.get_idents_by_emails(cid).values())[0]
        mails = ident['emails']
        name = ident['name']
        if not name:
            raw_names = c.get_commits_author_name_by_emails([cid])
            if cid not in raw_names:
                # TODO: get_commits_author_name_by_emails must
                # support look by committer email too
                name = 'Unnamed'
            else:
                name = raw_names[cid]

        infos = {}
        infos['name'] = name
        infos['mails_amount'] = len(mails)
        infos['gravatar'] = hashlib.md5(
            ident['default-email'].encode(errors='ignore')).hexdigest()
        return infos
Example #4
0
    def setUpClass(cls):
        cls.con = index.Connector(index='repoxplorertest')
        cls.conp = index.Connector(index='repoxplorertest',
                                   index_suffix='projects')
        c = Commits(cls.con)
        c.add_commits(COMMITS)
        projects_file = """
        project-templates:
          default:
            uri: https://github.com/nakata/%(name)s.git
            branches:
            - master

        projects:
          test:
            repos:
              monkey:
                template: default
          test2:
            repos:
              monkey:
                template: default
                tags:
                  - python
        """
        cls.db = set_projects_definition(cls.conp, projects_file)
Example #5
0
    def contributor(self, cid=None):
        if not cid:
            abort(404, detail="No contributor specified")

        c = Commits(index.Connector())
        idents = Contributors()

        try:
            cid = utils.decrypt(xorkey, cid)
        except Exception:
            abort(404, detail="The cid is incorrectly formated")

        _, ident = idents.get_ident_by_id(cid)
        if not ident:
            # No ident has been declared for that contributor
            ident = list(idents.get_idents_by_emails(cid).values())[0]
        mails = ident['emails']
        name = ident['name']
        if not name:
            raw_names = c.get_commits_author_name_by_emails([cid])
            if cid not in raw_names:
                # TODO: get_commits_author_name_by_emails must
                # support look by committer email too
                name = 'Unnamed'
            else:
                name = raw_names[cid]

        infos = {}
        infos['name'] = name
        infos['mails_amount'] = len(mails)
        infos['gravatar'] = hashlib.md5(
            ident['default-email'].encode(errors='ignore')).hexdigest()
        return infos
Example #6
0
def process_commits(options):
    path, ref_id, shas = options
    c = Commits(index.Connector())
    logger.info("Worker %s started to extract and index %s commits" %
                (mp.current_process(), len(shas)))
    buf = get_commits_desc(path, shas)
    c.add_commits(process_commits_desc_output(buf, ref_id))
Example #7
0
 def setUpClass(cls):
     cls.con = index.Connector(index='repoxplorertest')
     cls.conp = index.Connector(index='repoxplorertest',
                                index_suffix='projects')
     c = Commits(cls.con)
     c.add_commits(COMMITS)
     cls.db = set_projects_definition(cls.conp)
Example #8
0
 def setUpClass(cls):
     cls.con = index.Connector(index='repoxplorertest')
     cls.conp = index.Connector(
         index='repoxplorertest', index_suffix='projects')
     c = Commits(cls.con)
     c.add_commits(COMMITS)
     cls.db = set_projects_definition(cls.conp)
     t = Tags(index.Connector(
         index='repoxplorertest', index_suffix='tags'))
     tags = [
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b184',
             'date': 1410456005,
             'repo':
                 'https://github.com/nakata/monkey.git:monkey',
             'name': 'tag1',
         },
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b1845',
             'date': 1410456005,
             'repo':
                 'https://github.com/nakata/monkey.git:monkey',
             'name': 'tag2',
         }]
     t.add_tags(tags)
Example #9
0
    def setUpClass(cls):
        cls.con = index.Connector(index='repoxplorertest')
        cls.conp = index.Connector(
            index='repoxplorertest', index_suffix='projects')
        c = Commits(cls.con)
        c.add_commits(COMMITS)
        projects_file = """
        project-templates:
          default:
            uri: https://github.com/nakata/%(name)s.git
            branches:
            - master

        projects:
          test:
            repos:
              monkey:
                template: default
          test2:
            repos:
              monkey:
                template: default
                tags:
                  - python
        """
        cls.db = set_projects_definition(cls.conp, projects_file)
Example #10
0
    def metadata(self,
                 key=None,
                 pid=None,
                 tid=None,
                 cid=None,
                 gid=None,
                 dfrom=None,
                 dto=None,
                 inc_merge_commit=None,
                 inc_repos=None,
                 exc_groups=None,
                 inc_groups=None):

        c = Commits(index.Connector())
        projects_index = Projects()
        idents = Contributors()

        query_kwargs = utils.resolv_filters(projects_index, idents, pid, tid,
                                            cid, gid, dfrom, dto, inc_repos,
                                            inc_merge_commit, None, exc_groups,
                                            inc_groups)
        del query_kwargs['metadata']

        if not key:
            keys = c.get_metadata_keys(**query_kwargs)
            return keys
        else:
            vals = c.get_metadata_key_values(key, **query_kwargs)
            return vals
Example #11
0
    def authors(self,
                pid=None,
                tid=None,
                cid=None,
                gid=None,
                dfrom=None,
                dto=None,
                inc_merge_commit=None,
                inc_repos=None,
                metadata=None,
                exc_groups=None,
                inc_groups=None):

        projects_index = Projects()
        idents = Contributors()

        query_kwargs = utils.resolv_filters(projects_index, idents, pid, tid,
                                            cid, gid, dfrom, dto, inc_repos,
                                            inc_merge_commit, metadata,
                                            exc_groups, inc_groups)

        c = Commits(index.Connector())
        if not c.get_commits_amount(**query_kwargs):
            return []
        ret = c.get_authors_histo(**query_kwargs)[1]
        for bucket in ret:
            _idents = idents.get_idents_by_emails(bucket['authors_email'])
            bucket['value'] = len(_idents)
            bucket['date'] = bucket['key_as_string']
            del bucket['authors_email']
            del bucket['doc_count']
            del bucket['key_as_string']
            del bucket['key']

        return ret
Example #12
0
 def __init__(self, name, uri, parsers=None, con=None, config=None):
     if config:
         configuration.set_config(config)
     if not con:
         self.con = index.Connector()
     else:
         self.con = con
     self.c = Commits(self.con)
     self.t = Tags(self.con)
     if not os.path.isdir(conf.git_store):
         os.makedirs(conf.git_store)
     self.name = name
     self.uri = uri
     self.base_id = '%s:%s' % (self.uri, self.name)
     self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED)
     if not parsers:
         self.parsers = []
     else:
         self.parsers = parsers
     self.parsers_compiled = False
     self.local = os.path.join(conf.git_store, self.name,
                               self.uri.replace('/', '_'))
     if not os.path.isdir(self.local):
         os.makedirs(self.local)
     self.credentials_helper_path = os.path.join(
         sys.prefix, 'bin', 'repoxplorer-git-credentials-helper')
 def setUpClass(cls):
     cls.con = index.Connector(index='repoxplorertest')
     cls.conp = index.Connector(
         index='repoxplorertest', index_suffix='projects')
     c = Commits(cls.con)
     c.add_commits(COMMITS)
     cls.db = set_projects_definition(cls.conp)
     t = Tags(index.Connector(
         index='repoxplorertest', index_suffix='tags'))
     tags = [
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b184',
             'date': 1410456005,
             'repo':
                 'https://github.com/nakata/monkey.git:monkey',
             'name': 'tag1',
         },
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b1845',
             'date': 1410456005,
             'repo':
                 'https://github.com/nakata/monkey.git:monkey',
             'name': 'tag2',
         }]
     t.add_tags(tags)
Example #14
0
    def commits(self,
                pid=None,
                tid=None,
                cid=None,
                gid=None,
                dfrom=None,
                dto=None,
                inc_merge_commit=None,
                inc_repos=None,
                metadata=None,
                exc_groups=None,
                inc_groups=None):

        projects_index = Projects()
        idents = Contributors()

        query_kwargs = utils.resolv_filters(projects_index, idents, pid, tid,
                                            cid, gid, dfrom, dto, inc_repos,
                                            inc_merge_commit, metadata,
                                            exc_groups, inc_groups)

        c = Commits(index.Connector())
        if not c.get_commits_amount(**query_kwargs):
            return []
        ret = c.get_commits_histo(**query_kwargs)
        ret = [{
            'date': d['key_as_string'],
            'value': d['doc_count']
        } for d in ret[1]]
        return ret
Example #15
0
    def commits(self, pid=None, tid=None, cid=None, gid=None,
                start=0, limit=10,
                dfrom=None, dto=None, inc_merge_commit=None,
                inc_repos=None, metadata=None, exc_groups=None,
                inc_groups=None):

        c = Commits(index.Connector())
        projects_index = Projects()
        idents = Contributors()

        query_kwargs = utils.resolv_filters(
            projects_index, idents, pid, tid, cid, gid,
            dfrom, dto, inc_repos, inc_merge_commit,
            metadata, exc_groups, inc_groups)
        query_kwargs.update(
            {'start': start, 'limit': limit})

        resp = c.get_commits(**query_kwargs)

        for cmt in resp[2]:
            # Get extra metadata keys
            extra = set(cmt.keys()) - set(PROPERTIES.keys())
            cmt['metadata'] = list(extra)
            cmt['repos'] = [r for r in cmt['repos']
                            if not r.startswith('meta_ref: ')]
            # Compute link to access commit diff based on the
            # URL template provided in projects.yaml
            cmt['gitwebs'] = [
                projects_index.get_gitweb_link(r) %
                {'sha': cmt['sha']} for r in cmt['repos']]
            cmt['projects'] = utils.get_projects_from_references(
                projects_index, cmt['repos'])
            # Also remove the URI part
            cmt['repos'] = [":".join(p.split(':')[-2:]) for
                            p in cmt['repos']]
            # Request the ident index to fetch author/committer name/email
            for elm in ('author', 'committer'):
                ident = list(idents.get_idents_by_emails(
                    cmt['%s_email' % elm]).values())[0]
                cmt['%s_email' % elm] = ident['default-email']
                if ident['name']:
                    cmt['%s_name' % elm] = ident['name']
            # Convert the TTL to something human readable
            cmt['ttl'] = str((datetime.fromtimestamp(cmt['ttl']) -
                              datetime.fromtimestamp(0)))
            cmt['author_gravatar'] = \
                hashlib.md5(cmt['author_email'].encode(
                    errors='ignore')).hexdigest()
            cmt['committer_gravatar'] = \
                hashlib.md5(cmt['committer_email'].encode(
                    errors='ignore')).hexdigest()
            if len(cmt['commit_msg']) > 80:
                cmt['commit_msg'] = cmt['commit_msg'][0:76] + '...'
            # Add cid and ccid
            cmt['cid'] = utils.encrypt(xorkey, cmt['author_email'])
            cmt['ccid'] = utils.encrypt(xorkey, cmt['committer_email'])
            # Remove email details
            del cmt['author_email']
            del cmt['committer_email']
        return resp
Example #16
0
    def index(self, prefix=None, nameonly='false', withstats='false'):
        ci = Commits(index.Connector())
        contributors_index = Contributors()
        groups = contributors_index.get_groups()
        if nameonly == 'true':
            ret = dict([(k, None) for k in groups.keys()])
            if prefix:
                ret = dict([(k, None) for k in ret.keys() if
                            k.lower().startswith(prefix)])
            return ret
        ret_groups = {}
        for group, data in groups.items():
            if prefix and not group.lower().startswith(prefix.lower()):
                continue
            rg = {'members': {},
                  'description': data['description'],
                  'domains': data.get('domains', [])}
            emails = data['emails'].keys()
            members = contributors_index.get_idents_by_emails(emails)
            for id, member in members.items():
                member['gravatar'] = hashlib.md5(
                    member['default-email']).hexdigest()
                # TODO(fbo): bounces should be a list of bounce
                # Let's deactivate that for now
                # member['bounces'] = bounces
                del member['emails']
                if not member['name']:
                    # Try to find it among commits
                    suggested = ci.get_commits_author_name_by_emails(
                        [member['default-email']])
                    name = suggested.get(member['default-email'],
                                         'Unknown name')
                    member['name'] = name
                del member['default-email']
                rg['members'][utils.encrypt(xorkey, id)] = member

            if withstats == 'true':
                # TODO(fbo): This endpoint needs to handle some filters like
                # dates bounces to return more accurate stats

                # Fetch the number of projects and repos contributed to
                p_filter = {}
                query_kwargs = {
                    'mails': data['emails'],
                    'merge_commit': False,
                    'repos': p_filter,
                }
                projects = Projects()
                tops_ctl = tops.TopProjectsController()
                top_projects = tops_ctl.gbycommits(
                    ci, projects, query_kwargs, False)
                top_repos = tops_ctl.gbycommits(
                    ci, projects, query_kwargs, True)
                rg['projects_amount'] = len(top_projects)
                rg['repos_amount'] = len(top_repos)

            ret_groups[group] = rg

        return ret_groups
Example #17
0
 def setUp(self):
     FunctionalTest.setUp(self)
     self.con = index.Connector(index='repoxplorertest')
     self.conp = index.Connector(index='repoxplorertest',
                                 index_suffix='projects')
     c = Commits(self.con)
     c.add_commits(COMMITS)
     self.db = set_projects_definition(self.conp)
Example #18
0
 def setUp(self):
     FunctionalTest.setUp(self)
     self.con = index.Connector(index='repoxplorertest')
     self.conp = index.Connector(
         index='repoxplorertest', index_suffix='projects')
     c = Commits(self.con)
     c.add_commits(COMMITS)
     self.db = set_projects_definition(self.conp)
Example #19
0
def process_commits(options):
    path, ref_ids, shas = options
    if not isinstance(ref_ids, list):
        ref_ids = [ref_ids]
    c = Commits(index.Connector())
    logger.info("Worker %s started to extract and index %s commits" % (
        mp.current_process(), len(shas)))
    buf = get_commits_desc(path, shas)
    c.add_commits(process_commits_desc_output(buf, ref_ids))
Example #20
0
    def index(self, prefix=None, nameonly='false', withstats='false',
              pid=None, dfrom=None, dto=None, inc_merge_commit=None):
        ci = Commits(index.Connector())
        contributors_index = Contributors()
        groups = contributors_index.get_groups()
        if withstats == 'true':
            projects_index = Projects()
        if nameonly == 'true':
            ret = dict([(k, None) for k in groups.keys()])
            if prefix:
                ret = dict([(k, None) for k in ret.keys() if
                            k.lower().startswith(prefix)])
            return ret
        ret_groups = {}
        for group, data in groups.items():
            if prefix and not group.lower().startswith(prefix.lower()):
                continue
            rg = {'members': {},
                  'description': data.get('description', ''),
                  'domains': data.get('domains', [])}
            emails = list(data['emails'].keys())
            members = contributors_index.get_idents_by_emails(emails)
            for id, member in members.items():
                member['gravatar'] = hashlib.md5(
                    member['default-email'].encode(
                        errors='ignore')).hexdigest()
                # TODO(fbo): bounces should be a list of bounce
                # Let's deactivate that for now
                # member['bounces'] = bounces
                del member['emails']
                if not member['name']:
                    # Try to find it among commits
                    suggested = ci.get_commits_author_name_by_emails(
                        [member['default-email']])
                    name = suggested.get(member['default-email'],
                                         'Unnamed')
                    member['name'] = name
                del member['default-email']
                rg['members'][utils.encrypt(xorkey, id)] = member

            if withstats == 'true':
                # Fetch the number of projects and repos contributed to
                query_kwargs = utils.resolv_filters(
                    projects_index, contributors_index, pid, None, None, group,
                    dfrom, dto, None, inc_merge_commit, None, None, None)

                repos = [r for r in ci.get_repos(**query_kwargs)[1]
                         if not r.startswith('meta_ref: ')]
                projects = utils.get_projects_from_references(
                    projects_index, repos)
                rg['repos_amount'] = len(repos)
                rg['projects_amount'] = len(projects)

            ret_groups[group] = rg

        return ret_groups
Example #21
0
 def __init__(self, projects, con=None, config=None):
     if config:
         configuration.set_config(config)
     if not con:
         self.con = index.Connector()
     else:
         self.con = con
     self.projects = projects
     self.c = Commits(self.con)
     self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED)
Example #22
0
 def __init__(self, projects, con=None):
     if not con:
         self.con = index.Connector()
     else:
         self.con = con
     self.projects = projects
     self.c = Commits(self.con)
     self.t = Tags(self.con)
     self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED)
     self.current_base_ids = set()
Example #23
0
    def __init__(self, name, uri, parsers=None, con=None, meta_ref=None):
        if not con:
            self.con = index.Connector()
        else:
            self.con = con
        self.c = Commits(self.con)
        self.t = Tags(
            index.Connector(index=self.con.index, index_suffix='tags'))
        if not os.path.isdir(conf.git_store):
            os.makedirs(conf.git_store)
        self.name = name
        self.uri = uri
        self.base_id = '%s:%s' % (self.uri, self.name)
        self.seen_refs_path = os.path.join(conf.db_cache_path,
                                           SEEN_REFS_CACHED)
        if meta_ref:
            self.meta_ref = 'meta_ref: %s' % meta_ref
        else:
            self.meta_ref = None

        if not parsers:
            self.parsers = []
        else:
            self.parsers = parsers
        self.parsers_compiled = False
        self.local = os.path.join(conf.git_store, self.name,
                                  self.uri.replace('/', '_'))
        if not os.path.isdir(self.local):
            os.makedirs(self.local)

        self.credentials_helper_path = getattr(conf,
                                               'git_credential_helper_path',
                                               None)
        if not (self.credentials_helper_path
                and self.credentials_helper_path.startswith('/')
                and os.path.isfile(self.credentials_helper_path)):
            if self.credentials_helper_path:
                logger.warning(
                    'Configured git_credential_helper %s not found' %
                    (self.credentials_helper_path))
            self.credentials_helper_path = None
        # Look at the default installation pathes
        if not self.credentials_helper_path:
            self.credentials_helper_path = os.path.join(
                sys.prefix, 'bin', 'repoxplorer-git-credentials-helper')
            if not os.path.isfile(self.credentials_helper_path):
                self.credentials_helper_path = shutil.which(
                    'repoxplorer-git-credentials-helper')
            if not self.credentials_helper_path:
                logger.warning(
                    'Default repoxplorer-git-credential-helper command '
                    'not found')
Example #24
0
    def bycommits(self,
                  pid=None,
                  tid=None,
                  cid=None,
                  gid=None,
                  dfrom=None,
                  dto=None,
                  inc_merge_commit=None,
                  inc_repos=None,
                  metadata=None,
                  exc_groups=None,
                  inc_repos_detail=None,
                  inc_groups=None):

        c = Commits(index.Connector())
        projects_index = Projects()
        idents = Contributors()

        query_kwargs = utils.resolv_filters(projects_index, idents, pid, tid,
                                            cid, gid, dfrom, dto, inc_repos,
                                            inc_merge_commit, metadata,
                                            exc_groups, inc_groups)

        return self.gbycommits(c, projects_index, query_kwargs,
                               inc_repos_detail)
Example #25
0
 def search_authors(self, query=""):
     ret_limit = 100
     c = Commits(index.Connector())
     ret = c.es.search(index=c.index,
                       q=query,
                       df="author_name",
                       size=10000,
                       default_operator="AND",
                       _source_includes=["author_name", "author_email"])
     ret = ret['hits']['hits']
     if not len(ret):
         return {}
     idents = Contributors()
     authors = dict([(d['_source']['author_email'],
                      d['_source']['author_name']) for d in ret])
     result = {}
     _idents = idents.get_idents_by_emails(list(authors.keys())[:ret_limit])
     for iid, ident in _idents.items():
         email = ident['default-email']
         name = ident['name'] or authors[email]
         result[utils.encrypt(xorkey, iid)] = {
             'name': name,
             'gravatar':
             hashlib.md5(email.encode(errors='ignore')).hexdigest()
         }
     result = OrderedDict(
         sorted(list(result.items()), key=lambda t: t[1]['name']))
     return result
Example #26
0
    def contributor(self, cid=None):
        if not cid:
            abort(404,
                  detail="No contributor specified")

        try:
            cid = utils.decrypt(xorkey, cid)
        except Exception:
            abort(404,
                  detail="The cid is incorrectly formated")
        c = Commits(index.Connector())
        idents = Contributors()
        projects = Projects()
        _, ident = idents.get_ident_by_id(cid)
        if not ident:
            # No ident has been declared for that contributor
            ident = idents.get_idents_by_emails(cid).values()[0]
        mails = ident['emails']
        name = ident['name']
        if not name:
            raw_names = c.get_commits_author_name_by_emails([cid])
            if cid not in raw_names:
                # TODO: get_commits_author_name_by_emails must
                # support look by committer email too
                name = 'Unnamed'
            else:
                name = raw_names[cid]

        p_filter = {}
        query_kwargs = {
            'mails': mails,
            'merge_commit': False,
            'repos': p_filter,
        }

        tops_ctl = tops.TopProjectsController()
        top_projects = tops_ctl.gbycommits(c, projects, query_kwargs, False)
        top_repos = tops_ctl.gbycommits(c, projects, query_kwargs, True)

        infos = {}
        infos['name'] = name
        infos['mails_amount'] = len(mails)
        infos['projects_amount'] = len(top_projects)
        infos['repos_amount'] = len(top_repos)
        infos['gravatar'] = hashlib.md5(ident['default-email']).hexdigest()
        return infos
Example #27
0
class RefsCleaner():
    def __init__(self, projects, con=None, config=None):
        if config:
            configuration.set_config(config)
        if not con:
            self.con = index.Connector()
        else:
            self.con = con
        self.projects = projects
        self.c = Commits(self.con)
        self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED)

    def find_refs_to_clean(self):
        prjs = self.projects.get_projects_raw()
        refs_ids = set()
        for pid, pdata in prjs.items():
            for rid, repo in pdata['repos'].items():
                for branch in repo['branches']:
                    refs_ids.add('%s:%s:%s' % (repo['uri'], rid, branch))
        if not os.path.isfile(self.seen_refs_path):
            self.data = set()
        else:
            try:
                self.data = cPickle.load(file(self.seen_refs_path))
            except Exception:
                # Protect against corrupted file
                self.data = set()
        refs_to_clean = self.data - refs_ids
        logger.info("Found %s refs to clean." % len(refs_to_clean))
        return refs_to_clean

    def clean(self, refs):
        for ref in refs:
            # Find ref's Commits
            ids = [
                c['_id'] for c in self.c.get_commits(repos=[ref], scan=True)
            ]
            if not ids:
                self.remove_from_seen_refs(ref)
                continue
            logger.info("Ref %s no longer referenced. Cleaning %s cmts." %
                        (ref, len(ids)))
            # Do it by bulk of 10000 to not hurt memory
            bulk = 10000
            i = 0
            while True:
                _ids = ids[i:i + bulk]
                if not _ids:
                    break
                else:
                    delete_commits(self.c, ref, _ids, ref)
                    i += bulk
            self.remove_from_seen_refs(ref)

    def remove_from_seen_refs(self, ref_id):
        self.data.remove(ref_id)
        cPickle.dump(self.data, file(self.seen_refs_path, 'w'))
Example #28
0
    def diff(self, pid=None, tid=None, cid=None, gid=None,
             dfrom=None, dto=None, dfromref=None, dtoref=None,
             inc_merge_commit=None, inc_repos=None, metadata=None,
             exc_groups=None, limit=None, inc_groups=None):

        if not dfrom or not dto:
            abort(404,
                  detail="Must specify dfrom and dto dates for the new "
                         "contributors")

        if not dfromref or not dtoref:
            abort(404,
                  detail="Must specify dfromref and dtoref dates for the "
                         "reference period to compute new contributors")

        # Get contributors for the new period
        c = Commits(index.Connector())
        projects_index = Projects()
        idents = Contributors()

        query_kwargs = utils.resolv_filters(
            projects_index, idents, pid, tid, cid, gid,
            dfrom, dto, inc_repos, inc_merge_commit, metadata,
            exc_groups, inc_groups)

        authors_new = self.gbycommits(
            c, idents, query_kwargs, top=-1,
            resolv_name=False, clean_email=False)

        # Now get contributors for the old reference period
        query_kwargs = utils.resolv_filters(
            projects_index, idents, pid, tid, cid, gid,
            dfromref, dtoref, inc_repos, inc_merge_commit, metadata,
            exc_groups, inc_groups)

        authors_old = self.gbycommits(
            c, idents, query_kwargs, top=-1,
            resolv_name=False, clean_email=False)

        # And compute the difference
        cids_new = set([auth['cid'] for auth in authors_new]) - \
            set([auth['cid'] for auth in authors_old])
        authors_diff = [author for author in authors_new
                        if author['cid'] in cids_new]
        if limit is None:
            limit = 10
        else:
            limit = int(limit)
        # If limit set to a negative value all results will be returned
        if limit >= 0:
            authors_diff = authors_diff[:limit]

        self.resolv_name(c, authors_diff)

        return authors_diff
Example #29
0
 def setUpClass(cls):
     cls.con = index.Connector(index='repoxplorertest')
     cls.c = Commits(cls.con)
     cls.t = CommitsAmountTrend(cls.con)
     cls.commits = [
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b184',
             'author_date': 1410456005,
             'committer_date': 1410456010,
             'ttl': 5,
             'author_name': 'Nakata Daisuke',
             'committer_name': 'Nakata Daisuke',
             'author_email': '*****@*****.**',
             'committer_email': '*****@*****.**',
             'repos': [
                 'https://github.com/nakata/monkey.git:monkey:master',
             ],
             'line_modifieds': 10,
             'merge_commit': False,
             'commit_msg': 'Add init method',
         },
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b185',
             'author_date': 1410457005,
             'committer_date': 1410457005,
             'ttl': 0,
             'author_name': 'Keiko Amura',
             'committer_name': 'Keiko Amura',
             'author_email': '*****@*****.**',
             'committer_email': '*****@*****.**',
             'repos': [
                 'https://github.com/nakata/monkey.git:monkey:master',
             ],
             'line_modifieds': 100,
             'merge_commit': False,
             'commit_msg': 'Merge "Fix sanity unittest"',
         },
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b186',
             'author_date': 1410458005,
             'committer_date': 1410458005,
             'ttl': 0,
             'author_name': 'Jean Bon',
             'committer_name': 'Jean Bon',
             'author_email': '*****@*****.**',
             'committer_email': '*****@*****.**',
             'repos': [
                 'https://github.com/nakata/monkey.git:monkey:master',
             ],
             'line_modifieds': 200,
             'merge_commit': False,
             'commit_msg': 'Add request customer feature 19',
         },
     ]
     cls.c.add_commits(cls.commits)
Example #30
0
    def commits(self, pid=None, tid=None, cid=None, gid=None,
                dfrom=None, dto=None, inc_merge_commit=None,
                inc_repos=None, metadata=None, exc_groups=None,
                inc_groups=None):

        projects_index = Projects()
        idents = Contributors()

        query_kwargs = utils.resolv_filters(
            projects_index, idents, pid, tid, cid, gid,
            dfrom, dto, inc_repos, inc_merge_commit,
            metadata, exc_groups, inc_groups)

        c = Commits(index.Connector())
        if not c.get_commits_amount(**query_kwargs):
            return []
        ret = c.get_commits_histo(**query_kwargs)
        ret = [{'date': d['key_as_string'],
                'value': d['doc_count']} for d in ret[1]]
        return ret
Example #31
0
 def __init__(self, projects, con=None):
     if not con:
         self.con = index.Connector()
     else:
         self.con = con
     self.projects = projects
     self.c = Commits(self.con)
     self.t = Tags(index.Connector(
         index=self.con.index, index_suffix='tags'))
     self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED)
     self.current_base_ids = set()
Example #32
0
    def metadata(self, key=None, pid=None, tid=None, cid=None, gid=None,
                 dfrom=None, dto=None, inc_merge_commit=None,
                 inc_repos=None, exc_groups=None, inc_groups=None):

        c = Commits(index.Connector())
        projects_index = Projects()
        idents = Contributors()

        query_kwargs = utils.resolv_filters(
            projects_index, idents, pid, tid, cid, gid,
            dfrom, dto, inc_repos, inc_merge_commit, None, exc_groups,
            inc_groups)
        del query_kwargs['metadata']

        if not key:
            keys = c.get_metadata_keys(**query_kwargs)
            return keys
        else:
            vals = c.get_metadata_key_values(key, **query_kwargs)
            return vals
Example #33
0
    def __init__(self, name, uri, parsers=None,
                 con=None, meta_ref=None):
        if not con:
            self.con = index.Connector()
        else:
            self.con = con
        self.c = Commits(self.con)
        self.t = Tags(index.Connector(
            index=self.con.index, index_suffix='tags'))
        if not os.path.isdir(conf.git_store):
            os.makedirs(conf.git_store)
        self.name = name
        self.uri = uri
        self.base_id = '%s:%s' % (self.uri, self.name)
        self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED)
        if meta_ref:
            self.meta_ref = 'meta_ref: %s' % meta_ref
        else:
            self.meta_ref = None

        if not parsers:
            self.parsers = []
        else:
            self.parsers = parsers
        self.parsers_compiled = False
        self.local = os.path.join(conf.git_store,
                                  self.name,
                                  self.uri.replace('/', '_'))
        if not os.path.isdir(self.local):
            os.makedirs(self.local)

        self.credentials_helper_path = getattr(
            conf, 'git_credential_helper_path', None)
        if not (self.credentials_helper_path and
                self.credentials_helper_path.startswith('/') and
                os.path.isfile(self.credentials_helper_path)):
            if self.credentials_helper_path:
                logger.warning(
                    'Configured git_credential_helper %s not found' % (
                        self.credentials_helper_path))
            self.credentials_helper_path = None
        # Look at the default installation pathes
        if not self.credentials_helper_path:
            self.credentials_helper_path = os.path.join(
                sys.prefix, 'bin', 'repoxplorer-git-credentials-helper')
            if not os.path.isfile(self.credentials_helper_path):
                self.credentials_helper_path = shutil.which(
                    'repoxplorer-git-credentials-helper')
            if not self.credentials_helper_path:
                logger.warning(
                    'Default repoxplorer-git-credential-helper command '
                    'not found')
Example #34
0
 def setUpClass(cls):
     cls.con = index.Connector(index='repoxplorertest')
     cls.projects = {
         'test': {
             'repos': [{
                 'uri': 'https://github.com/nakata/monkey.git',
                 'name': 'monkey',
                 'branch': 'master'
             }]
         }
     }
     cls.c = Commits(cls.con)
     cls.c.add_commits(COMMITS)
Example #35
0
class CommitsAmountTrend(object):
    def __init__(self, connector=None):
        self.ic = Commits(connector)

    def get_trend(self, mails=[], repos=[],
                  period_a=None, period_b=None,
                  merge_commit=None):
        """ Return the amount diff and the percentil
        of amount evolution for perdiod a compared to
        period b
        """
        assert isinstance(period_a, tuple)
        assert isinstance(period_b, tuple)
        c_amnt_a = self.ic.get_commits_amount(mails, repos,
                                              period_a[0], period_a[1],
                                              merge_commit)
        c_amnt_b = self.ic.get_commits_amount(mails, repos,
                                              period_b[0], period_b[1],
                                              merge_commit)
        diff = c_amnt_a - c_amnt_b
        trend = diff * 100 / (c_amnt_a or c_amnt_b)
        return diff, trend
Example #36
0
class CommitsAmountTrend(object):
    def __init__(self, connector=None):
        self.ic = Commits(connector)

    def get_trend(self,
                  mails=[],
                  repos=[],
                  period_a=None,
                  period_b=None,
                  merge_commit=None):
        """ Return the amount diff and the percentil
        of amount evolution for perdiod a compared to
        period b
        """
        assert isinstance(period_a, tuple)
        assert isinstance(period_b, tuple)
        c_amnt_a = self.ic.get_commits_amount(mails, repos, period_a[0],
                                              period_a[1], merge_commit)
        c_amnt_b = self.ic.get_commits_amount(mails, repos, period_b[0],
                                              period_b[1], merge_commit)
        diff = c_amnt_a - c_amnt_b
        trend = diff * 100 / (c_amnt_a or c_amnt_b)
        return diff, trend
Example #37
0
 def __init__(self, name, uri, parsers=None, con=None, config=None):
     if config:
         configuration.set_config(config)
     if not con:
         self.con = index.Connector()
     else:
         self.con = con
     self.c = Commits(self.con)
     self.t = Tags(self.con)
     if not os.path.isdir(conf.git_store):
         os.makedirs(conf.git_store)
     self.name = name
     self.uri = uri
     self.base_id = '%s:%s' % (self.uri, self.name)
     if not parsers:
         self.parsers = []
     else:
         self.parsers = parsers
     self.parsers_compiled = False
     self.local = os.path.join(conf.git_store, self.name,
                               self.uri.replace('/', '_'))
     if not os.path.isdir(self.local):
         os.makedirs(self.local)
Example #38
0
    ret = []
    for i in range(amount):
        author_date = random.randint(
            epoch_start, epoch_start + 1000000)
        author = emails[random.randint(0, email_amount - 1)]
        committer = emails[random.randint(0, email_amount - 1)]
        c = {}
        c['sha'] = hashlib.sha256(create_random_str(10)).hexdigest()
        c['author_name'] = author[0]
        c['committer_name'] = committer[0]
        c['author_email'] = author[1]
        c['committer_email'] = committer[1]
        c['author_date'] = author_date
        c['committer_date'] = random.randint(
            author_date + 1, author_date + 10000)
        c['ttl'] = random.randint(0, 10000)
        c['commit_msg'] = gen_commit_msg()
        c['line_modifieds'] = random.randint(0, 10000)
        c['merge_commit'] = False
        c['projects'] = [project, ]
        ret.append(c)
    print("Generation of %s fake commits done." % amount)
    return ret


if __name__ == '__main__':
    amount = 100000
    c = Commits(index.Connector())
    c.add_commits(gen_fake_commits(amount))
    print("Indexation done.")
Example #39
0
class RepoIndexer():
    def __init__(self, name, uri, parsers=None, con=None, config=None):
        if config:
            configuration.set_config(config)
        if not con:
            self.con = index.Connector()
        else:
            self.con = con
        self.c = Commits(self.con)
        self.t = Tags(self.con)
        if not os.path.isdir(conf.git_store):
            os.makedirs(conf.git_store)
        self.name = name
        self.uri = uri
        self.base_id = '%s:%s' % (self.uri, self.name)
        self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED)
        if not parsers:
            self.parsers = []
        else:
            self.parsers = parsers
        self.parsers_compiled = False
        self.local = os.path.join(conf.git_store, self.name,
                                  self.uri.replace('/', '_'))
        if not os.path.isdir(self.local):
            os.makedirs(self.local)
        self.credentials_helper_path = os.path.join(
            sys.prefix, 'bin', 'repoxplorer-git-credentials-helper')

    def __str__(self):
        return 'Git indexer of %s' % self.ref_id

    def save_seen_ref_in_cache(self):
        # Keep a cache a each ref that have been indexed
        # This is use later to discover seen refs no longer in projects.yaml
        # In that case a removal from the backend will be performed
        logger.debug("Save ref %s into seen_refs file" % self.ref_id)
        if not os.path.isfile(self.seen_refs_path):
            data = set()
        else:
            try:
                data = cPickle.load(file(self.seen_refs_path))
            except Exception:
                # Protect against corrupted file
                data = set()
        data.add(self.ref_id)
        cPickle.dump(data, file(self.seen_refs_path, 'w'))

    def set_branch(self, branch):
        self.branch = branch
        self.ref_id = '%s:%s:%s' % (self.uri, self.name, self.branch)
        self.save_seen_ref_in_cache()

    def git_init(self):
        logger.debug("Git init for %s:%s in %s" %
                     (self.uri, self.name, self.local))
        run(["git", "init", "--bare", "."], self.local)
        if "origin" not in run(["git", "remote", "-v"], self.local):
            run(["git", "remote", "add", "origin", self.uri], self.local)

    def git_fetch_branch(self):
        logger.debug("Fetch %s %s:%s" % (self.name, self.uri, self.branch))
        run([
            "git", "-c",
            "credential.helper=%s" % self.credentials_helper_path, "fetch",
            "-nk", "origin",
            "+%s:%s" % (self.branch, self.branch)
        ], self.local)

    def get_refs(self):
        refs = run([
            "git", "-c",
            "credential.helper=%s" % self.credentials_helper_path, "ls-remote",
            "origin"
        ], self.local).splitlines()
        self.refs = []
        for r in refs:
            self.refs.append(r.split('\t'))

    def get_heads(self):
        self.heads = filter(lambda x: x[1].startswith('refs/heads/'),
                            self.refs)

    def get_tags(self):
        self.tags = filter(lambda x: x[1].startswith('refs/tags/'), self.refs)

    def git_get_commit_obj(self):
        self.commits = get_all_shas(self.local)

    def run_workers(self, shas, workers):
        BULK_CHUNK = 1000
        to_process = []
        if workers == 0:
            # Default value (auto)
            workers = mp.cpu_count() - 1 or 1
        while True:
            try:
                shas[BULK_CHUNK]
                to_process.append(shas[:BULK_CHUNK])
                del shas[:BULK_CHUNK]
            except IndexError:
                # Add the rest
                to_process.append(shas)
                break
        options = [(self.local, self.ref_id, stp) for stp in to_process]
        worker_pool = mp.Pool(workers)
        worker_pool.map(process_commits, options)
        worker_pool.terminate()
        worker_pool.join()

    def is_branch_fully_indexed(self):
        branch = [
            head for head in self.heads if head[1].endswith(self.branch)
        ][0]
        branch_tip_sha = branch[0]
        cmt = self.c.get_commit(branch_tip_sha, silent=True)
        if cmt and self.ref_id in cmt['repos']:
            return True
        return False

    def get_current_commit_indexed(self):
        """ Fetch from the index commits mentionned for this repo
        and branch.
        """
        self.already_indexed = [
            c['_id']
            for c in self.c.get_commits(repos=[self.ref_id], scan=True)
        ]
        logger.debug(
            "%s: In the DB - repo history is composed of %s commits." %
            (self.name, len(self.already_indexed)))

    def compute_to_index_to_delete(self):
        """ Compute the list of commits (sha) to index and the
        list to delete from the index.
        """
        logger.debug("%s: Upstream - repo history is composed of %s commits." %
                     (self.name, len(self.commits)))
        self.to_delete = set(self.already_indexed) - set(self.commits)
        self.to_index = set(self.commits) - set(self.already_indexed)
        logger.debug("%s: Indexer will reference %s commits." %
                     (self.name, len(self.to_index)))
        logger.debug("%s: Indexer will dereference %s commits." %
                     (self.name, len(self.to_delete)))

    def compute_to_create_to_update(self):
        if self.to_index:
            res = self.c.get_commits_by_id(list(self.to_index))
            to_update = [
                c['_source'] for c in res['docs'] if c['found'] is True
            ]
            to_create = [c['_id'] for c in res['docs'] if c['found'] is False]
            return to_create, to_update
        return [], []

    def index_tags(self):
        def c_tid(t):
            return "%s%s%s" % (t['sha'], t['name'].replace('refs/tags/',
                                                           ''), t['repo'])

        if not self.tags:
            logger.debug('%s: no tags detected for this repository' %
                         (self.name))
            return
        logger.debug('%s: %s tags exist upstream' %
                     (self.name, len(self.tags)))
        tags = self.t.get_tags([self.base_id])
        existing = dict([(c_tid(t['_source']), t['_id']) for t in tags])
        logger.debug('%s: %s tags already referenced' %
                     (self.name, len(existing)))
        # Some commits may be not found because it is possible the branches
        # has not been indexed.
        commits = [
            c['_source']
            for c in self.c.get_commits_by_id([t[0]
                                               for t in self.tags])['docs']
            if c['found']
        ]
        lookup = dict([(c['sha'], c['committer_date']) for c in commits])
        to_delete = [
            v for k, v in existing.items() if k not in [
                "%s%s%s" %
                (sha, name.replace('refs/tags/', '').replace('^{}', ''),
                 self.base_id) for sha, name in self.tags
            ]
        ]
        docs = []
        for sha, name in self.tags:
            if sha in lookup:
                doc = {}
                doc['name'] = name.replace('refs/tags/', '').replace('^{}', '')
                doc['sha'] = sha
                doc['date'] = lookup[sha]
                doc['repo'] = self.base_id
                if c_tid(doc) in existing:
                    continue
                docs.append(doc)
        if docs:
            logger.info('%s: %s tags will be indexed' % (self.name, len(docs)))
            self.t.add_tags(docs)
        if to_delete:
            logger.info('%s: %s tags will be deleted' %
                        (self.name, len(to_delete)))
            self.t.del_tags(to_delete)

    def index(self, extract_workers=1):
        # Compile the parsers
        if self.parsers:
            if not self.parsers_compiled:
                raw_parsers = copy.deepcopy(self.parsers)
                self.parsers = []
                for parser in raw_parsers:
                    self.parsers.append(re.compile(parser))
                logger.debug("%s: Prepared %s regex parsers for commit msgs" %
                             (self.name, len(self.parsers)))
                self.parsers_compiled = True

        # check whether a commit should be completly deleted or
        # updated by removing the repo from the repos field
        if self.to_delete:
            delete_commits(self.c, self.name, self.to_delete, self.ref_id)

        # check whether a commit should be created or
        # updated by adding the repo into the repos field
        if self.to_index:
            to_create, to_update = self.compute_to_create_to_update()

            if to_create:
                logger.info("%s: %s commits will be created ..." %
                            (self.name, len(to_create)))
                self.run_workers(to_create, extract_workers)

            if to_update:
                logger.info(
                    "%s: %s commits already indexed and need to be updated" %
                    (self.name, len(to_update)))
                for c in to_update:
                    c['repos'].append(self.ref_id)
                self.c.update_commits(to_update)
Example #40
0
class RefsCleaner():
    def __init__(self, projects, con=None):
        if not con:
            self.con = index.Connector()
        else:
            self.con = con
        self.projects = projects
        self.c = Commits(self.con)
        self.t = Tags(index.Connector(
            index=self.con.index, index_suffix='tags'))
        self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED)
        self.current_base_ids = set()

    def find_refs_to_clean(self):
        projects = self.projects.get_projects(source=['refs'])
        refs_ids = set()
        for project in projects.values():
            for ref in project['refs']:
                self.current_base_ids.add(ref['shortrid'])
                refs_ids.add(ref['fullrid'])
        if not os.path.isfile(self.seen_refs_path):
            self.data = set()
        else:
            try:
                self.data = pickle.load(open(self.seen_refs_path, 'rb'))
            except Exception:
                # Protect against corrupted file
                self.data = set()
        refs_to_clean = self.data - refs_ids
        if len(refs_to_clean):
            logger.info("Found %s refs to clean." % len(refs_to_clean))
        return refs_to_clean

    def clean_tags(self, base_id):
        # Tags are indexed by repos (base_id) not by ref (ref_id)
        tags = self.t.get_tags([base_id])
        ids = [t['_id'] for t in tags]
        if ids:
            logger.info("Repo %s no longer referenced. Cleaning %s tags" % (
                base_id, len(ids)))
            self.t.del_tags(ids)

    def clean_ref_cmts(self, ref):
        # Find ref's Commits
        ids = [c['_id'] for c in
               self.c.get_commits(repos=[ref], scan=True)]
        if not ids:
            self.remove_from_seen_refs(ref)
            return
        logger.info("Ref %s no longer referenced. Cleaning %s cmts." %
                    (ref, len(ids)))
        # Do it by bulk of 10000 to not hurt memory
        bulk = 10000
        i = 0
        while True:
            _ids = ids[i:i+bulk]
            if not _ids:
                break
            else:
                delete_commits(self.c, ref, _ids, ref)
                i += bulk

    def clean(self, refs):
        base_ids = set()
        for ref in refs:
            self.clean_ref_cmts(ref)
            self.remove_from_seen_refs(ref)
            base_id = ref.replace(":%s" % ref.split(':')[-1], "")
            if base_id not in self.current_base_ids:
                base_ids.add(base_id)
        for base_id in base_ids:
            self.clean_tags(base_id)

    def remove_from_seen_refs(self, ref_id):
        # Remove from the struct to be dumped
        self.data.remove(ref_id)
        pickle.dump(self.data, open(self.seen_refs_path, 'wb'))
Example #41
0
 def setUpClass(cls):
     cls.con = index.Connector(index='repoxplorertest')
     cls.c = Commits(cls.con)
     cls.commits = [
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b184',
             'author_date': 1410456005,
             'committer_date': 1410456010,
             'ttl': 5,
             'author_name': 'Nakata Daisuke',
             'committer_name': 'Nakata Daisuke',
             'author_email': '*****@*****.**',
             'author_email_domain': 'joker.org',
             'committer_email': '*****@*****.**',
             'repos': [
                 'https://github.com/nakata/monkey.git:monkey:master', ],
             'line_modifieds': 10,
             'merge_commit': False,
             'commit_msg': 'Add init method',
             'implement-partial-epic': ['Great Feature', ],
             'files_list': [
                 'ichi/',
                 'ichi/ni/kuruma.sh',
                 'ichi/ni/san/',
                 'ichi/ni/san/tamago.txt'],
         },
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b185',
             'author_date': 1410457005,
             'committer_date': 1410457005,
             'ttl': 0,
             'author_name': 'Keiko Amura',
             'committer_name': 'Keiko Amura',
             'author_email': '*****@*****.**',
             'author_email_domain': 'hanabi.org',
             'committer_email': '*****@*****.**',
             'repos': [
                 'https://github.com/amura/kotatsu.git:kotatsu:master', ],
             'line_modifieds': 100,
             'merge_commit': True,
             'commit_msg': 'Merge "Fix sanity unittest"',
             'files_list': [],
         },
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b186',
             'author_date': 1410458005,
             'committer_date': 1410458005,
             'ttl': 0,
             'author_name': 'Jean Bon',
             'committer_name': 'Jean Bon',
             'author_email': '*****@*****.**',
             'author_email_domain': 'joker.org',
             'committer_email': '*****@*****.**',
             'repos': [
                 'https://github.com/nakata/monkey.git:monkey:master', ],
             'line_modifieds': 200,
             'merge_commit': False,
             'commit_msg': 'Add request customer feature 19',
             'implement-feature': ['19', ],
             'files_list': [
                 'monkey/',
                 'monkey/__init__.py',
                 'ichi/',
                 'ichi/ni/hikoki.asm'],
         },
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b187',
             'author_date': 1410459005,
             'committer_date': 1410459005,
             'ttl': 0,
             'author_name': 'Jean Bon',
             'committer_name': 'Jean Bon',
             'author_email': '*****@*****.**',
             'author_email_domain': 'joker.org',
             'committer_email': '*****@*****.**',
             'repos': [
                 'https://github.com/nakata/monkey.git:monkey:master', ],
             'line_modifieds': 300,
             'merge_commit': False,
             'commit_msg': 'Add request customer feature 20',
             'implement-feature': ['20', ],
             'implement-partial-epic': ['Great Feature', ],
             'files_list': [],
         },
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b188',
             'author_date': 1410460005,
             'committer_date': 1410460005,
             'ttl': 0,
             'author_name': 'Jean Bon',
             'committer_name': 'Jean Bon',
             'author_email': '*****@*****.**',
             'author_email_domain': 'joker.org',
             'committer_email': '*****@*****.**',
             'repos': [
                 'https://github.com/amura/kotatsu.git:kotatsu:master',
                 'https://github.com/amura/kotatsu.git:kotatsu:devel'],
             'line_modifieds': 400,
             'merge_commit': False,
             'commit_msg': 'Add request customer feature 21',
             'files_list': [],
         },
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b189',
             'author_date': 1410461005,
             'committer_date': 1410461005,
             'ttl': 0,
             'author_name': 'Jean Bon',
             'committer_name': 'Jean Bon',
             'author_email': '*****@*****.**',
             'author_email_domain': 'joker.org',
             'committer_email': '*****@*****.**',
             'repos': [
                 'https://github.com/amura/kotatsu.git:kotatsu:devel', ],
             'line_modifieds': 400,
             'merge_commit': False,
             'commit_msg': 'Add request customer feature 22',
             'files_list': [],
         },
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b190',
             'author_date': 1410491005,
             'committer_date': 1410491005,
             'ttl': 0,
             'author_name': 'Jean Bon',
             'committer_name': 'Jean Bon',
             'author_email': '*****@*****.**',
             'author_email_domain': 'joker.org',
             'committer_email': '*****@*****.**',
             'repos': [
                 'https://github.com/amura/kotatsu.git:kotatsu:devel', ],
             'line_modifieds': 400,
             'merge_commit': False,
             'commit_msg': 'Add request customer feature 23',
             'files_list': [],
         },
         {
             'sha': '3597334f2cb10772950c97ddf2f6cc17b191',
             'author_date': 46400,
             'committer_date': 46400,
             'ttl': 0,
             'author_name': 'Marty Junior',
             'committer_name': 'Marty Junior',
             'author_email': '*****@*****.**',
             'author_email_domain': 'joker.org',
             'committer_email': '*****@*****.**',
             'repos': [
                 'https://github.com/amura/kotatsu.git:kotatsu:devel', ],
             'line_modifieds': 400,
             'merge_commit': False,
             'commit_msg': 'Add request customer feature 23',
             'files_list': [],
         }
     ]
     cls.c.add_commits(cls.commits)
Example #42
0
class RefsCleaner():
    def __init__(self, projects, con=None):
        if not con:
            self.con = index.Connector()
        else:
            self.con = con
        self.projects = projects
        self.c = Commits(self.con)
        self.t = Tags(
            index.Connector(index=self.con.index, index_suffix='tags'))
        self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED)
        self.current_base_ids = set()

    def find_refs_to_clean(self):
        projects = self.projects.get_projects(source=['refs'])
        refs_ids = set()
        for project in projects.values():
            for ref in project['refs']:
                self.current_base_ids.add(ref['shortrid'])
                refs_ids.add(ref['fullrid'])
        if not os.path.isfile(self.seen_refs_path):
            self.data = set()
        else:
            try:
                self.data = pickle.load(open(self.seen_refs_path, 'rb'))
            except Exception:
                # Protect against corrupted file
                self.data = set()
        refs_to_clean = self.data - refs_ids
        if len(refs_to_clean):
            logger.info("Found %s refs to clean." % len(refs_to_clean))
        return refs_to_clean

    def clean_tags(self, base_id):
        # Tags are indexed by repos (base_id) not by ref (ref_id)
        tags = self.t.get_tags([base_id])
        ids = [t['_id'] for t in tags]
        if ids:
            logger.info("Repo %s no longer referenced. Cleaning %s tags" %
                        (base_id, len(ids)))
            self.t.del_tags(ids)

    def clean_ref_cmts(self, ref):
        # Find ref's Commits
        ids = [c['_id'] for c in self.c.get_commits(repos=[ref], scan=True)]
        if not ids:
            self.remove_from_seen_refs(ref)
            return
        logger.info("Ref %s no longer referenced. Cleaning %s cmts." %
                    (ref, len(ids)))
        # Do it by bulk of 10000 to not hurt memory
        bulk = 10000
        i = 0
        while True:
            _ids = ids[i:i + bulk]
            if not _ids:
                break
            else:
                delete_commits(self.c, ref, _ids, ref)
                i += bulk

    def clean(self, refs):
        base_ids = set()
        for ref in refs:
            self.clean_ref_cmts(ref)
            self.remove_from_seen_refs(ref)
            base_id = ref.replace(":%s" % ref.split(':')[-1], "")
            if base_id not in self.current_base_ids:
                base_ids.add(base_id)
        for base_id in base_ids:
            self.clean_tags(base_id)

    def remove_from_seen_refs(self, ref_id):
        # Remove from the struct to be dumped
        self.data.remove(ref_id)
        pickle.dump(self.data, open(self.seen_refs_path, 'wb'))
Example #43
0
class RepoIndexer():
    def __init__(self, name, uri, parsers=None,
                 con=None, meta_ref=None):
        if not con:
            self.con = index.Connector()
        else:
            self.con = con
        self.c = Commits(self.con)
        self.t = Tags(index.Connector(
            index=self.con.index, index_suffix='tags'))
        if not os.path.isdir(conf.git_store):
            os.makedirs(conf.git_store)
        self.name = name
        self.uri = uri
        self.base_id = '%s:%s' % (self.uri, self.name)
        self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED)
        if meta_ref:
            self.meta_ref = 'meta_ref: %s' % meta_ref
        else:
            self.meta_ref = None

        if not parsers:
            self.parsers = []
        else:
            self.parsers = parsers
        self.parsers_compiled = False
        self.local = os.path.join(conf.git_store,
                                  self.name,
                                  self.uri.replace('/', '_'))
        if not os.path.isdir(self.local):
            os.makedirs(self.local)

        self.credentials_helper_path = getattr(
            conf, 'git_credential_helper_path', None)
        if not (self.credentials_helper_path and
                self.credentials_helper_path.startswith('/') and
                os.path.isfile(self.credentials_helper_path)):
            if self.credentials_helper_path:
                logger.warning(
                    'Configured git_credential_helper %s not found' % (
                        self.credentials_helper_path))
            self.credentials_helper_path = None
        # Look at the default installation pathes
        if not self.credentials_helper_path:
            self.credentials_helper_path = os.path.join(
                sys.prefix, 'bin', 'repoxplorer-git-credentials-helper')
            if not os.path.isfile(self.credentials_helper_path):
                self.credentials_helper_path = shutil.which(
                    'repoxplorer-git-credentials-helper')
            if not self.credentials_helper_path:
                logger.warning(
                    'Default repoxplorer-git-credential-helper command '
                    'not found')

    def __str__(self):
        return 'Git indexer of %s' % self.ref_id

    def save_seen_ref_in_cache(self):
        # Keep a cache a each ref that have been indexed
        # This is use later to discover seen refs no longer in projects.yaml
        # In that case a removal from the backend will be performed
        logger.debug("Save ref %s into seen_refs file" % self.ref_id)
        if not os.path.isfile(self.seen_refs_path):
            data = set()
        else:
            try:
                data = pickle.load(open(self.seen_refs_path, 'rb'))
            except Exception:
                # Protect against corrupted file
                data = set()
        data.add(self.ref_id)
        pickle.dump(data, open(self.seen_refs_path, 'wb'))

    def set_branch(self, branch):
        self.branch = branch
        self.ref_id = '%s:%s:%s' % (self.uri, self.name, self.branch)
        self.save_seen_ref_in_cache()

    def git_init(self):
        logger.debug("Git init for %s:%s in %s" % (
            self.uri, self.name, self.local))
        run(["git", "init", "--bare", "."], self.local)
        remotes = run(["git", "remote", "-v"], self.local)
        remote_names = [line.split()[0] for line in remotes.splitlines()]
        if "origin" not in remote_names:
            run(["git", "remote", "add", "origin", self.uri], self.local)

    def git_fetch_branch(self):
        logger.debug("Fetch %s %s:%s" % (self.name, self.uri,
                                         self.branch))
        run(["git", "-c",
             "credential.helper=%s" % self.credentials_helper_path,
             "fetch", "-nk", "origin", "+%s:%s" % (self.branch, self.branch)],
            self.local)

    def get_refs(self):
        refs = run([
            "git", "-c", "credential.helper=%s" % self.credentials_helper_path,
            "ls-remote", "origin"], self.local).splitlines()
        self.refs = []
        for r in refs:
            self.refs.append(r.split('\t'))

    def get_heads(self):
        self.heads = [x for x in self.refs if x[1].startswith('refs/heads/')]

    def get_tags(self):
        self.tags = [x for x in self.refs if x[1].startswith('refs/tags/')]

    def git_get_commit_obj(self):
        self.commits = get_all_shas(self.local)

    def run_workers(self, shas, workers):
        BULK_CHUNK = 1000
        to_process = []
        if workers == 0:
            # Default value (auto)
            workers = mp.cpu_count() - 1 or 1
        while True:
            try:
                shas[BULK_CHUNK]
                to_process.append(shas[:BULK_CHUNK])
                del shas[:BULK_CHUNK]
            except IndexError:
                # Add the rest
                to_process.append(shas)
                break
        ref_ids = [self.ref_id]
        if self.meta_ref:
            ref_ids.append(self.meta_ref)
        options = [
            (self.local, ref_ids, stp) for stp in to_process]
        worker_pool = mp.Pool(workers)
        worker_pool.map(process_commits, options)
        worker_pool.terminate()
        worker_pool.join()

    def is_branch_fully_indexed(self):
        branch = [head for head in self.heads if
                  head[1].endswith(self.branch)][0]
        branch_tip_sha = branch[0]
        _, _, cmts_list = self.c.get_commits(repos=[self.ref_id], limit=1)
        if not cmts_list:
            return False
        cmt = cmts_list[0]
        if branch_tip_sha != cmt['sha']:
            return False
        return True

    def get_current_commits_indexed(self):
        """ Fetch from the index commits mentionned for this repo
        and branch.
        """
        self.already_indexed = [c['_id'] for c in
                                self.c.get_commits(repos=[self.ref_id],
                                                   scan=True)]
        logger.debug(
            "%s: In the DB - repo history is composed of %s commits." % (
                self.name, len(self.already_indexed)))

    def compute_to_index_to_delete(self):
        """ Compute the list of commits (sha) to index and the
        list to delete from the index.
        """
        logger.debug(
            "%s: Upstream - repo history is composed of %s commits." % (
                self.name, len(self.commits)))
        self.to_delete = set(self.already_indexed) - set(self.commits)
        self.to_index = set(self.commits) - set(self.already_indexed)
        logger.debug(
            "%s: Indexer will reference %s commits." % (
                self.name,
                len(self.to_index)))
        logger.debug(
            "%s: Indexer will dereference %s commits." % (
                self.name,
                len(self.to_delete)))

    def compute_to_create_to_update(self):
        if self.to_index:
            res = self.c.get_commits_by_id(list(self.to_index))
            to_update = [c['_source'] for
                         c in res['docs'] if c['found'] is True]
            to_create = [c['_id'] for
                         c in res['docs'] if c['found'] is False]
            return to_create, to_update
        return [], []

    def index_tags(self):
        def c_tid(t):
            return "%s%s%s" % (t['sha'],
                               t['name'].replace('refs/tags/', ''),
                               t['repo'])
        if not self.tags:
            logger.debug('%s: no tags detected for this repository' % (
                         self.name))
            return
        logger.debug('%s: %s tags exist upstream' % (
                     self.name, len(self.tags)))
        tags = self.t.get_tags([self.base_id])
        existing = dict([(c_tid(t['_source']), t['_id']) for t in tags])
        logger.debug('%s: %s tags already referenced' % (
                     self.name, len(existing)))
        # Some commits may be not found because it is possible the branches
        # has not been indexed.
        commits = [c['_source'] for c in self.c.get_commits_by_id(
                   [t[0] for t in self.tags])['docs'] if c['found']]
        lookup = dict([(c['sha'], c['committer_date']) for c in commits])
        to_delete = [v for k, v in existing.items() if
                     k not in ["%s%s%s" % (sha,
                                           name.replace('refs/tags/',
                                                        '').replace('^{}', ''),
                                           self.base_id) for
                               sha, name in self.tags]]
        docs = []
        for sha, name in self.tags:
            if sha in lookup:
                doc = {}
                doc['name'] = name.replace('refs/tags/', '').replace('^{}', '')
                doc['sha'] = sha
                doc['date'] = lookup[sha]
                doc['repo'] = self.base_id
                if c_tid(doc) in existing:
                    continue
                docs.append(doc)
        if docs:
            logger.info('%s: %s tags will be indexed' % (
                        self.name, len(docs)))
            self.t.add_tags(docs)
        if to_delete:
            logger.info('%s: %s tags will be deleted' % (
                        self.name, len(to_delete)))
            self.t.del_tags(to_delete)

    def index(self, extract_workers=1):
        # Compile the parsers
        if self.parsers:
            if not self.parsers_compiled:
                raw_parsers = copy.deepcopy(self.parsers)
                self.parsers = []
                for parser in raw_parsers:
                    self.parsers.append(re.compile(parser))
                logger.debug(
                    "%s: Prepared %s regex parsers for commit msgs" % (
                        self.name, len(self.parsers)))
                self.parsers_compiled = True

        # check whether a commit should be completly deleted or
        # updated by removing the repo from the repos field
        if self.to_delete:
            delete_commits(self.c, self.name, self.to_delete, self.ref_id)

        # check whether a commit should be created or
        # updated by adding the repo into the repos field
        if self.to_index:
            to_create, to_update = self.compute_to_create_to_update()

            if to_create:
                logger.info("%s: %s commits will be created ..." % (
                    self.name, len(to_create)))
                self.run_workers(to_create, extract_workers)

            if to_update:
                logger.info(
                    "%s: %s commits already indexed and need to be updated" % (
                        self.name, len(to_update)))
                for c in to_update:
                    c['repos'].append(self.ref_id)
                self.c.update_commits(to_update)
Example #44
0
 def setUpClass(cls):
     cls.con = index.Connector(index='repoxplorertest')
     cls.c = Commits(cls.con)
     cls.c.add_commits(COMMITS)
Example #45
0
    def index(self,
              prefix=None,
              nameonly='false',
              withstats='false',
              pid=None,
              dfrom=None,
              dto=None,
              inc_merge_commit=None):
        ci = Commits(index.Connector())
        contributors_index = Contributors()
        groups = contributors_index.get_groups()
        if withstats == 'true':
            projects_index = Projects()
        if nameonly == 'true':
            ret = dict([(k, None) for k in groups.keys()])
            if prefix:
                ret = dict([(k, None) for k in ret.keys()
                            if k.lower().startswith(prefix)])
            return ret
        ret_groups = {}
        for group, data in groups.items():
            if prefix and not group.lower().startswith(prefix.lower()):
                continue
            rg = {
                'members': {},
                'description': data.get('description', ''),
                'domains': data.get('domains', [])
            }
            emails = list(data['emails'].keys())
            members = contributors_index.get_idents_by_emails(emails)
            for id, member in members.items():
                member['gravatar'] = hashlib.md5(
                    member['default-email'].encode(
                        errors='ignore')).hexdigest()
                # TODO(fbo): bounces should be a list of bounce
                # Let's deactivate that for now
                # member['bounces'] = bounces
                del member['emails']
                if not member['name']:
                    # Try to find it among commits
                    suggested = ci.get_commits_author_name_by_emails(
                        [member['default-email']])
                    name = suggested.get(member['default-email'], 'Unnamed')
                    member['name'] = name
                del member['default-email']
                rg['members'][utils.encrypt(xorkey, id)] = member

            if withstats == 'true':
                # Fetch the number of projects and repos contributed to
                query_kwargs = utils.resolv_filters(
                    projects_index, contributors_index, pid, None, None, group,
                    dfrom, dto, None, inc_merge_commit, None, None, None)

                repos = [
                    r for r in ci.get_repos(**query_kwargs)[1]
                    if not r.startswith('meta_ref: ')
                ]
                projects = utils.get_projects_from_references(
                    projects_index, repos)
                rg['repos_amount'] = len(repos)
                rg['projects_amount'] = len(projects)

            ret_groups[group] = rg

        return ret_groups
Example #46
0
 def __init__(self, connector=None):
     self.ic = Commits(connector)
Example #47
0
    for i in xrange(amount):
        author_date = random.randint(epoch_start, epoch_start + 1000000)
        author = emails[random.randint(0, email_amount - 1)]
        committer = emails[random.randint(0, email_amount - 1)]
        c = {}
        c['sha'] = hashlib.sha256(create_random_str(10)).hexdigest()
        c['author_name'] = author[0]
        c['committer_name'] = committer[0]
        c['author_email'] = author[1]
        c['committer_email'] = committer[1]
        c['author_date'] = author_date
        c['committer_date'] = random.randint(author_date + 1,
                                             author_date + 10000)
        c['ttl'] = random.randint(0, 10000)
        c['commit_msg'] = gen_commit_msg()
        c['line_modifieds'] = random.randint(0, 10000)
        c['merge_commit'] = False
        c['projects'] = [
            project,
        ]
        ret.append(c)
    print "Generation of %s fake commits done." % amount
    return ret


if __name__ == '__main__':
    amount = 100000
    c = Commits(index.Connector())
    c.add_commits(gen_fake_commits(amount))
    print "Indexation done."