def setUpClass(cls): cls.con = index.Connector(index='repoxplorertest') cls.conp = index.Connector( index='repoxplorertest', index_suffix='projects') c = Commits(cls.con) c.add_commits(COMMITS) cls.db = set_projects_definition(cls.conp)
def authors(self, pid=None, tid=None, cid=None, gid=None, dfrom=None, dto=None, inc_merge_commit=None, inc_repos=None, metadata=None, exc_groups=None, inc_groups=None): projects_index = Projects() idents = Contributors() query_kwargs = utils.resolv_filters( projects_index, idents, pid, tid, cid, gid, dfrom, dto, inc_repos, inc_merge_commit, metadata, exc_groups, inc_groups) c = Commits(index.Connector()) if not c.get_commits_amount(**query_kwargs): return [] ret = c.get_authors_histo(**query_kwargs)[1] for bucket in ret: _idents = idents.get_idents_by_emails(bucket['authors_email']) bucket['value'] = len(_idents) bucket['date'] = bucket['key_as_string'] del bucket['authors_email'] del bucket['doc_count'] del bucket['key_as_string'] del bucket['key'] return ret
def contributor(self, cid=None): if not cid: abort(404, detail="No contributor specified") c = Commits(index.Connector()) idents = Contributors() try: cid = utils.decrypt(xorkey, cid) except Exception: abort(404, detail="The cid is incorrectly formated") _, ident = idents.get_ident_by_id(cid) if not ident: # No ident has been declared for that contributor ident = list(idents.get_idents_by_emails(cid).values())[0] mails = ident['emails'] name = ident['name'] if not name: raw_names = c.get_commits_author_name_by_emails([cid]) if cid not in raw_names: # TODO: get_commits_author_name_by_emails must # support look by committer email too name = 'Unnamed' else: name = raw_names[cid] infos = {} infos['name'] = name infos['mails_amount'] = len(mails) infos['gravatar'] = hashlib.md5( ident['default-email'].encode(errors='ignore')).hexdigest() return infos
def setUpClass(cls): cls.con = index.Connector(index='repoxplorertest') cls.conp = index.Connector(index='repoxplorertest', index_suffix='projects') c = Commits(cls.con) c.add_commits(COMMITS) projects_file = """ project-templates: default: uri: https://github.com/nakata/%(name)s.git branches: - master projects: test: repos: monkey: template: default test2: repos: monkey: template: default tags: - python """ cls.db = set_projects_definition(cls.conp, projects_file)
def process_commits(options): path, ref_id, shas = options c = Commits(index.Connector()) logger.info("Worker %s started to extract and index %s commits" % (mp.current_process(), len(shas))) buf = get_commits_desc(path, shas) c.add_commits(process_commits_desc_output(buf, ref_id))
def setUpClass(cls): cls.con = index.Connector(index='repoxplorertest') cls.conp = index.Connector(index='repoxplorertest', index_suffix='projects') c = Commits(cls.con) c.add_commits(COMMITS) cls.db = set_projects_definition(cls.conp)
def setUpClass(cls): cls.con = index.Connector(index='repoxplorertest') cls.conp = index.Connector( index='repoxplorertest', index_suffix='projects') c = Commits(cls.con) c.add_commits(COMMITS) cls.db = set_projects_definition(cls.conp) t = Tags(index.Connector( index='repoxplorertest', index_suffix='tags')) tags = [ { 'sha': '3597334f2cb10772950c97ddf2f6cc17b184', 'date': 1410456005, 'repo': 'https://github.com/nakata/monkey.git:monkey', 'name': 'tag1', }, { 'sha': '3597334f2cb10772950c97ddf2f6cc17b1845', 'date': 1410456005, 'repo': 'https://github.com/nakata/monkey.git:monkey', 'name': 'tag2', }] t.add_tags(tags)
def setUpClass(cls): cls.con = index.Connector(index='repoxplorertest') cls.conp = index.Connector( index='repoxplorertest', index_suffix='projects') c = Commits(cls.con) c.add_commits(COMMITS) projects_file = """ project-templates: default: uri: https://github.com/nakata/%(name)s.git branches: - master projects: test: repos: monkey: template: default test2: repos: monkey: template: default tags: - python """ cls.db = set_projects_definition(cls.conp, projects_file)
def metadata(self, key=None, pid=None, tid=None, cid=None, gid=None, dfrom=None, dto=None, inc_merge_commit=None, inc_repos=None, exc_groups=None, inc_groups=None): c = Commits(index.Connector()) projects_index = Projects() idents = Contributors() query_kwargs = utils.resolv_filters(projects_index, idents, pid, tid, cid, gid, dfrom, dto, inc_repos, inc_merge_commit, None, exc_groups, inc_groups) del query_kwargs['metadata'] if not key: keys = c.get_metadata_keys(**query_kwargs) return keys else: vals = c.get_metadata_key_values(key, **query_kwargs) return vals
def authors(self, pid=None, tid=None, cid=None, gid=None, dfrom=None, dto=None, inc_merge_commit=None, inc_repos=None, metadata=None, exc_groups=None, inc_groups=None): projects_index = Projects() idents = Contributors() query_kwargs = utils.resolv_filters(projects_index, idents, pid, tid, cid, gid, dfrom, dto, inc_repos, inc_merge_commit, metadata, exc_groups, inc_groups) c = Commits(index.Connector()) if not c.get_commits_amount(**query_kwargs): return [] ret = c.get_authors_histo(**query_kwargs)[1] for bucket in ret: _idents = idents.get_idents_by_emails(bucket['authors_email']) bucket['value'] = len(_idents) bucket['date'] = bucket['key_as_string'] del bucket['authors_email'] del bucket['doc_count'] del bucket['key_as_string'] del bucket['key'] return ret
def __init__(self, name, uri, parsers=None, con=None, config=None): if config: configuration.set_config(config) if not con: self.con = index.Connector() else: self.con = con self.c = Commits(self.con) self.t = Tags(self.con) if not os.path.isdir(conf.git_store): os.makedirs(conf.git_store) self.name = name self.uri = uri self.base_id = '%s:%s' % (self.uri, self.name) self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED) if not parsers: self.parsers = [] else: self.parsers = parsers self.parsers_compiled = False self.local = os.path.join(conf.git_store, self.name, self.uri.replace('/', '_')) if not os.path.isdir(self.local): os.makedirs(self.local) self.credentials_helper_path = os.path.join( sys.prefix, 'bin', 'repoxplorer-git-credentials-helper')
def commits(self, pid=None, tid=None, cid=None, gid=None, dfrom=None, dto=None, inc_merge_commit=None, inc_repos=None, metadata=None, exc_groups=None, inc_groups=None): projects_index = Projects() idents = Contributors() query_kwargs = utils.resolv_filters(projects_index, idents, pid, tid, cid, gid, dfrom, dto, inc_repos, inc_merge_commit, metadata, exc_groups, inc_groups) c = Commits(index.Connector()) if not c.get_commits_amount(**query_kwargs): return [] ret = c.get_commits_histo(**query_kwargs) ret = [{ 'date': d['key_as_string'], 'value': d['doc_count'] } for d in ret[1]] return ret
def commits(self, pid=None, tid=None, cid=None, gid=None, start=0, limit=10, dfrom=None, dto=None, inc_merge_commit=None, inc_repos=None, metadata=None, exc_groups=None, inc_groups=None): c = Commits(index.Connector()) projects_index = Projects() idents = Contributors() query_kwargs = utils.resolv_filters( projects_index, idents, pid, tid, cid, gid, dfrom, dto, inc_repos, inc_merge_commit, metadata, exc_groups, inc_groups) query_kwargs.update( {'start': start, 'limit': limit}) resp = c.get_commits(**query_kwargs) for cmt in resp[2]: # Get extra metadata keys extra = set(cmt.keys()) - set(PROPERTIES.keys()) cmt['metadata'] = list(extra) cmt['repos'] = [r for r in cmt['repos'] if not r.startswith('meta_ref: ')] # Compute link to access commit diff based on the # URL template provided in projects.yaml cmt['gitwebs'] = [ projects_index.get_gitweb_link(r) % {'sha': cmt['sha']} for r in cmt['repos']] cmt['projects'] = utils.get_projects_from_references( projects_index, cmt['repos']) # Also remove the URI part cmt['repos'] = [":".join(p.split(':')[-2:]) for p in cmt['repos']] # Request the ident index to fetch author/committer name/email for elm in ('author', 'committer'): ident = list(idents.get_idents_by_emails( cmt['%s_email' % elm]).values())[0] cmt['%s_email' % elm] = ident['default-email'] if ident['name']: cmt['%s_name' % elm] = ident['name'] # Convert the TTL to something human readable cmt['ttl'] = str((datetime.fromtimestamp(cmt['ttl']) - datetime.fromtimestamp(0))) cmt['author_gravatar'] = \ hashlib.md5(cmt['author_email'].encode( errors='ignore')).hexdigest() cmt['committer_gravatar'] = \ hashlib.md5(cmt['committer_email'].encode( errors='ignore')).hexdigest() if len(cmt['commit_msg']) > 80: cmt['commit_msg'] = cmt['commit_msg'][0:76] + '...' # Add cid and ccid cmt['cid'] = utils.encrypt(xorkey, cmt['author_email']) cmt['ccid'] = utils.encrypt(xorkey, cmt['committer_email']) # Remove email details del cmt['author_email'] del cmt['committer_email'] return resp
def index(self, prefix=None, nameonly='false', withstats='false'): ci = Commits(index.Connector()) contributors_index = Contributors() groups = contributors_index.get_groups() if nameonly == 'true': ret = dict([(k, None) for k in groups.keys()]) if prefix: ret = dict([(k, None) for k in ret.keys() if k.lower().startswith(prefix)]) return ret ret_groups = {} for group, data in groups.items(): if prefix and not group.lower().startswith(prefix.lower()): continue rg = {'members': {}, 'description': data['description'], 'domains': data.get('domains', [])} emails = data['emails'].keys() members = contributors_index.get_idents_by_emails(emails) for id, member in members.items(): member['gravatar'] = hashlib.md5( member['default-email']).hexdigest() # TODO(fbo): bounces should be a list of bounce # Let's deactivate that for now # member['bounces'] = bounces del member['emails'] if not member['name']: # Try to find it among commits suggested = ci.get_commits_author_name_by_emails( [member['default-email']]) name = suggested.get(member['default-email'], 'Unknown name') member['name'] = name del member['default-email'] rg['members'][utils.encrypt(xorkey, id)] = member if withstats == 'true': # TODO(fbo): This endpoint needs to handle some filters like # dates bounces to return more accurate stats # Fetch the number of projects and repos contributed to p_filter = {} query_kwargs = { 'mails': data['emails'], 'merge_commit': False, 'repos': p_filter, } projects = Projects() tops_ctl = tops.TopProjectsController() top_projects = tops_ctl.gbycommits( ci, projects, query_kwargs, False) top_repos = tops_ctl.gbycommits( ci, projects, query_kwargs, True) rg['projects_amount'] = len(top_projects) rg['repos_amount'] = len(top_repos) ret_groups[group] = rg return ret_groups
def setUp(self): FunctionalTest.setUp(self) self.con = index.Connector(index='repoxplorertest') self.conp = index.Connector(index='repoxplorertest', index_suffix='projects') c = Commits(self.con) c.add_commits(COMMITS) self.db = set_projects_definition(self.conp)
def setUp(self): FunctionalTest.setUp(self) self.con = index.Connector(index='repoxplorertest') self.conp = index.Connector( index='repoxplorertest', index_suffix='projects') c = Commits(self.con) c.add_commits(COMMITS) self.db = set_projects_definition(self.conp)
def process_commits(options): path, ref_ids, shas = options if not isinstance(ref_ids, list): ref_ids = [ref_ids] c = Commits(index.Connector()) logger.info("Worker %s started to extract and index %s commits" % ( mp.current_process(), len(shas))) buf = get_commits_desc(path, shas) c.add_commits(process_commits_desc_output(buf, ref_ids))
def index(self, prefix=None, nameonly='false', withstats='false', pid=None, dfrom=None, dto=None, inc_merge_commit=None): ci = Commits(index.Connector()) contributors_index = Contributors() groups = contributors_index.get_groups() if withstats == 'true': projects_index = Projects() if nameonly == 'true': ret = dict([(k, None) for k in groups.keys()]) if prefix: ret = dict([(k, None) for k in ret.keys() if k.lower().startswith(prefix)]) return ret ret_groups = {} for group, data in groups.items(): if prefix and not group.lower().startswith(prefix.lower()): continue rg = {'members': {}, 'description': data.get('description', ''), 'domains': data.get('domains', [])} emails = list(data['emails'].keys()) members = contributors_index.get_idents_by_emails(emails) for id, member in members.items(): member['gravatar'] = hashlib.md5( member['default-email'].encode( errors='ignore')).hexdigest() # TODO(fbo): bounces should be a list of bounce # Let's deactivate that for now # member['bounces'] = bounces del member['emails'] if not member['name']: # Try to find it among commits suggested = ci.get_commits_author_name_by_emails( [member['default-email']]) name = suggested.get(member['default-email'], 'Unnamed') member['name'] = name del member['default-email'] rg['members'][utils.encrypt(xorkey, id)] = member if withstats == 'true': # Fetch the number of projects and repos contributed to query_kwargs = utils.resolv_filters( projects_index, contributors_index, pid, None, None, group, dfrom, dto, None, inc_merge_commit, None, None, None) repos = [r for r in ci.get_repos(**query_kwargs)[1] if not r.startswith('meta_ref: ')] projects = utils.get_projects_from_references( projects_index, repos) rg['repos_amount'] = len(repos) rg['projects_amount'] = len(projects) ret_groups[group] = rg return ret_groups
def __init__(self, projects, con=None, config=None): if config: configuration.set_config(config) if not con: self.con = index.Connector() else: self.con = con self.projects = projects self.c = Commits(self.con) self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED)
def __init__(self, projects, con=None): if not con: self.con = index.Connector() else: self.con = con self.projects = projects self.c = Commits(self.con) self.t = Tags(self.con) self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED) self.current_base_ids = set()
def __init__(self, name, uri, parsers=None, con=None, meta_ref=None): if not con: self.con = index.Connector() else: self.con = con self.c = Commits(self.con) self.t = Tags( index.Connector(index=self.con.index, index_suffix='tags')) if not os.path.isdir(conf.git_store): os.makedirs(conf.git_store) self.name = name self.uri = uri self.base_id = '%s:%s' % (self.uri, self.name) self.seen_refs_path = os.path.join(conf.db_cache_path, SEEN_REFS_CACHED) if meta_ref: self.meta_ref = 'meta_ref: %s' % meta_ref else: self.meta_ref = None if not parsers: self.parsers = [] else: self.parsers = parsers self.parsers_compiled = False self.local = os.path.join(conf.git_store, self.name, self.uri.replace('/', '_')) if not os.path.isdir(self.local): os.makedirs(self.local) self.credentials_helper_path = getattr(conf, 'git_credential_helper_path', None) if not (self.credentials_helper_path and self.credentials_helper_path.startswith('/') and os.path.isfile(self.credentials_helper_path)): if self.credentials_helper_path: logger.warning( 'Configured git_credential_helper %s not found' % (self.credentials_helper_path)) self.credentials_helper_path = None # Look at the default installation pathes if not self.credentials_helper_path: self.credentials_helper_path = os.path.join( sys.prefix, 'bin', 'repoxplorer-git-credentials-helper') if not os.path.isfile(self.credentials_helper_path): self.credentials_helper_path = shutil.which( 'repoxplorer-git-credentials-helper') if not self.credentials_helper_path: logger.warning( 'Default repoxplorer-git-credential-helper command ' 'not found')
def bycommits(self, pid=None, tid=None, cid=None, gid=None, dfrom=None, dto=None, inc_merge_commit=None, inc_repos=None, metadata=None, exc_groups=None, inc_repos_detail=None, inc_groups=None): c = Commits(index.Connector()) projects_index = Projects() idents = Contributors() query_kwargs = utils.resolv_filters(projects_index, idents, pid, tid, cid, gid, dfrom, dto, inc_repos, inc_merge_commit, metadata, exc_groups, inc_groups) return self.gbycommits(c, projects_index, query_kwargs, inc_repos_detail)
def search_authors(self, query=""): ret_limit = 100 c = Commits(index.Connector()) ret = c.es.search(index=c.index, q=query, df="author_name", size=10000, default_operator="AND", _source_includes=["author_name", "author_email"]) ret = ret['hits']['hits'] if not len(ret): return {} idents = Contributors() authors = dict([(d['_source']['author_email'], d['_source']['author_name']) for d in ret]) result = {} _idents = idents.get_idents_by_emails(list(authors.keys())[:ret_limit]) for iid, ident in _idents.items(): email = ident['default-email'] name = ident['name'] or authors[email] result[utils.encrypt(xorkey, iid)] = { 'name': name, 'gravatar': hashlib.md5(email.encode(errors='ignore')).hexdigest() } result = OrderedDict( sorted(list(result.items()), key=lambda t: t[1]['name'])) return result
def contributor(self, cid=None): if not cid: abort(404, detail="No contributor specified") try: cid = utils.decrypt(xorkey, cid) except Exception: abort(404, detail="The cid is incorrectly formated") c = Commits(index.Connector()) idents = Contributors() projects = Projects() _, ident = idents.get_ident_by_id(cid) if not ident: # No ident has been declared for that contributor ident = idents.get_idents_by_emails(cid).values()[0] mails = ident['emails'] name = ident['name'] if not name: raw_names = c.get_commits_author_name_by_emails([cid]) if cid not in raw_names: # TODO: get_commits_author_name_by_emails must # support look by committer email too name = 'Unnamed' else: name = raw_names[cid] p_filter = {} query_kwargs = { 'mails': mails, 'merge_commit': False, 'repos': p_filter, } tops_ctl = tops.TopProjectsController() top_projects = tops_ctl.gbycommits(c, projects, query_kwargs, False) top_repos = tops_ctl.gbycommits(c, projects, query_kwargs, True) infos = {} infos['name'] = name infos['mails_amount'] = len(mails) infos['projects_amount'] = len(top_projects) infos['repos_amount'] = len(top_repos) infos['gravatar'] = hashlib.md5(ident['default-email']).hexdigest() return infos
class RefsCleaner(): def __init__(self, projects, con=None, config=None): if config: configuration.set_config(config) if not con: self.con = index.Connector() else: self.con = con self.projects = projects self.c = Commits(self.con) self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED) def find_refs_to_clean(self): prjs = self.projects.get_projects_raw() refs_ids = set() for pid, pdata in prjs.items(): for rid, repo in pdata['repos'].items(): for branch in repo['branches']: refs_ids.add('%s:%s:%s' % (repo['uri'], rid, branch)) if not os.path.isfile(self.seen_refs_path): self.data = set() else: try: self.data = cPickle.load(file(self.seen_refs_path)) except Exception: # Protect against corrupted file self.data = set() refs_to_clean = self.data - refs_ids logger.info("Found %s refs to clean." % len(refs_to_clean)) return refs_to_clean def clean(self, refs): for ref in refs: # Find ref's Commits ids = [ c['_id'] for c in self.c.get_commits(repos=[ref], scan=True) ] if not ids: self.remove_from_seen_refs(ref) continue logger.info("Ref %s no longer referenced. Cleaning %s cmts." % (ref, len(ids))) # Do it by bulk of 10000 to not hurt memory bulk = 10000 i = 0 while True: _ids = ids[i:i + bulk] if not _ids: break else: delete_commits(self.c, ref, _ids, ref) i += bulk self.remove_from_seen_refs(ref) def remove_from_seen_refs(self, ref_id): self.data.remove(ref_id) cPickle.dump(self.data, file(self.seen_refs_path, 'w'))
def diff(self, pid=None, tid=None, cid=None, gid=None, dfrom=None, dto=None, dfromref=None, dtoref=None, inc_merge_commit=None, inc_repos=None, metadata=None, exc_groups=None, limit=None, inc_groups=None): if not dfrom or not dto: abort(404, detail="Must specify dfrom and dto dates for the new " "contributors") if not dfromref or not dtoref: abort(404, detail="Must specify dfromref and dtoref dates for the " "reference period to compute new contributors") # Get contributors for the new period c = Commits(index.Connector()) projects_index = Projects() idents = Contributors() query_kwargs = utils.resolv_filters( projects_index, idents, pid, tid, cid, gid, dfrom, dto, inc_repos, inc_merge_commit, metadata, exc_groups, inc_groups) authors_new = self.gbycommits( c, idents, query_kwargs, top=-1, resolv_name=False, clean_email=False) # Now get contributors for the old reference period query_kwargs = utils.resolv_filters( projects_index, idents, pid, tid, cid, gid, dfromref, dtoref, inc_repos, inc_merge_commit, metadata, exc_groups, inc_groups) authors_old = self.gbycommits( c, idents, query_kwargs, top=-1, resolv_name=False, clean_email=False) # And compute the difference cids_new = set([auth['cid'] for auth in authors_new]) - \ set([auth['cid'] for auth in authors_old]) authors_diff = [author for author in authors_new if author['cid'] in cids_new] if limit is None: limit = 10 else: limit = int(limit) # If limit set to a negative value all results will be returned if limit >= 0: authors_diff = authors_diff[:limit] self.resolv_name(c, authors_diff) return authors_diff
def setUpClass(cls): cls.con = index.Connector(index='repoxplorertest') cls.c = Commits(cls.con) cls.t = CommitsAmountTrend(cls.con) cls.commits = [ { 'sha': '3597334f2cb10772950c97ddf2f6cc17b184', 'author_date': 1410456005, 'committer_date': 1410456010, 'ttl': 5, 'author_name': 'Nakata Daisuke', 'committer_name': 'Nakata Daisuke', 'author_email': '*****@*****.**', 'committer_email': '*****@*****.**', 'repos': [ 'https://github.com/nakata/monkey.git:monkey:master', ], 'line_modifieds': 10, 'merge_commit': False, 'commit_msg': 'Add init method', }, { 'sha': '3597334f2cb10772950c97ddf2f6cc17b185', 'author_date': 1410457005, 'committer_date': 1410457005, 'ttl': 0, 'author_name': 'Keiko Amura', 'committer_name': 'Keiko Amura', 'author_email': '*****@*****.**', 'committer_email': '*****@*****.**', 'repos': [ 'https://github.com/nakata/monkey.git:monkey:master', ], 'line_modifieds': 100, 'merge_commit': False, 'commit_msg': 'Merge "Fix sanity unittest"', }, { 'sha': '3597334f2cb10772950c97ddf2f6cc17b186', 'author_date': 1410458005, 'committer_date': 1410458005, 'ttl': 0, 'author_name': 'Jean Bon', 'committer_name': 'Jean Bon', 'author_email': '*****@*****.**', 'committer_email': '*****@*****.**', 'repos': [ 'https://github.com/nakata/monkey.git:monkey:master', ], 'line_modifieds': 200, 'merge_commit': False, 'commit_msg': 'Add request customer feature 19', }, ] cls.c.add_commits(cls.commits)
def commits(self, pid=None, tid=None, cid=None, gid=None, dfrom=None, dto=None, inc_merge_commit=None, inc_repos=None, metadata=None, exc_groups=None, inc_groups=None): projects_index = Projects() idents = Contributors() query_kwargs = utils.resolv_filters( projects_index, idents, pid, tid, cid, gid, dfrom, dto, inc_repos, inc_merge_commit, metadata, exc_groups, inc_groups) c = Commits(index.Connector()) if not c.get_commits_amount(**query_kwargs): return [] ret = c.get_commits_histo(**query_kwargs) ret = [{'date': d['key_as_string'], 'value': d['doc_count']} for d in ret[1]] return ret
def __init__(self, projects, con=None): if not con: self.con = index.Connector() else: self.con = con self.projects = projects self.c = Commits(self.con) self.t = Tags(index.Connector( index=self.con.index, index_suffix='tags')) self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED) self.current_base_ids = set()
def metadata(self, key=None, pid=None, tid=None, cid=None, gid=None, dfrom=None, dto=None, inc_merge_commit=None, inc_repos=None, exc_groups=None, inc_groups=None): c = Commits(index.Connector()) projects_index = Projects() idents = Contributors() query_kwargs = utils.resolv_filters( projects_index, idents, pid, tid, cid, gid, dfrom, dto, inc_repos, inc_merge_commit, None, exc_groups, inc_groups) del query_kwargs['metadata'] if not key: keys = c.get_metadata_keys(**query_kwargs) return keys else: vals = c.get_metadata_key_values(key, **query_kwargs) return vals
def __init__(self, name, uri, parsers=None, con=None, meta_ref=None): if not con: self.con = index.Connector() else: self.con = con self.c = Commits(self.con) self.t = Tags(index.Connector( index=self.con.index, index_suffix='tags')) if not os.path.isdir(conf.git_store): os.makedirs(conf.git_store) self.name = name self.uri = uri self.base_id = '%s:%s' % (self.uri, self.name) self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED) if meta_ref: self.meta_ref = 'meta_ref: %s' % meta_ref else: self.meta_ref = None if not parsers: self.parsers = [] else: self.parsers = parsers self.parsers_compiled = False self.local = os.path.join(conf.git_store, self.name, self.uri.replace('/', '_')) if not os.path.isdir(self.local): os.makedirs(self.local) self.credentials_helper_path = getattr( conf, 'git_credential_helper_path', None) if not (self.credentials_helper_path and self.credentials_helper_path.startswith('/') and os.path.isfile(self.credentials_helper_path)): if self.credentials_helper_path: logger.warning( 'Configured git_credential_helper %s not found' % ( self.credentials_helper_path)) self.credentials_helper_path = None # Look at the default installation pathes if not self.credentials_helper_path: self.credentials_helper_path = os.path.join( sys.prefix, 'bin', 'repoxplorer-git-credentials-helper') if not os.path.isfile(self.credentials_helper_path): self.credentials_helper_path = shutil.which( 'repoxplorer-git-credentials-helper') if not self.credentials_helper_path: logger.warning( 'Default repoxplorer-git-credential-helper command ' 'not found')
def setUpClass(cls): cls.con = index.Connector(index='repoxplorertest') cls.projects = { 'test': { 'repos': [{ 'uri': 'https://github.com/nakata/monkey.git', 'name': 'monkey', 'branch': 'master' }] } } cls.c = Commits(cls.con) cls.c.add_commits(COMMITS)
class CommitsAmountTrend(object): def __init__(self, connector=None): self.ic = Commits(connector) def get_trend(self, mails=[], repos=[], period_a=None, period_b=None, merge_commit=None): """ Return the amount diff and the percentil of amount evolution for perdiod a compared to period b """ assert isinstance(period_a, tuple) assert isinstance(period_b, tuple) c_amnt_a = self.ic.get_commits_amount(mails, repos, period_a[0], period_a[1], merge_commit) c_amnt_b = self.ic.get_commits_amount(mails, repos, period_b[0], period_b[1], merge_commit) diff = c_amnt_a - c_amnt_b trend = diff * 100 / (c_amnt_a or c_amnt_b) return diff, trend
def __init__(self, name, uri, parsers=None, con=None, config=None): if config: configuration.set_config(config) if not con: self.con = index.Connector() else: self.con = con self.c = Commits(self.con) self.t = Tags(self.con) if not os.path.isdir(conf.git_store): os.makedirs(conf.git_store) self.name = name self.uri = uri self.base_id = '%s:%s' % (self.uri, self.name) if not parsers: self.parsers = [] else: self.parsers = parsers self.parsers_compiled = False self.local = os.path.join(conf.git_store, self.name, self.uri.replace('/', '_')) if not os.path.isdir(self.local): os.makedirs(self.local)
ret = [] for i in range(amount): author_date = random.randint( epoch_start, epoch_start + 1000000) author = emails[random.randint(0, email_amount - 1)] committer = emails[random.randint(0, email_amount - 1)] c = {} c['sha'] = hashlib.sha256(create_random_str(10)).hexdigest() c['author_name'] = author[0] c['committer_name'] = committer[0] c['author_email'] = author[1] c['committer_email'] = committer[1] c['author_date'] = author_date c['committer_date'] = random.randint( author_date + 1, author_date + 10000) c['ttl'] = random.randint(0, 10000) c['commit_msg'] = gen_commit_msg() c['line_modifieds'] = random.randint(0, 10000) c['merge_commit'] = False c['projects'] = [project, ] ret.append(c) print("Generation of %s fake commits done." % amount) return ret if __name__ == '__main__': amount = 100000 c = Commits(index.Connector()) c.add_commits(gen_fake_commits(amount)) print("Indexation done.")
class RepoIndexer(): def __init__(self, name, uri, parsers=None, con=None, config=None): if config: configuration.set_config(config) if not con: self.con = index.Connector() else: self.con = con self.c = Commits(self.con) self.t = Tags(self.con) if not os.path.isdir(conf.git_store): os.makedirs(conf.git_store) self.name = name self.uri = uri self.base_id = '%s:%s' % (self.uri, self.name) self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED) if not parsers: self.parsers = [] else: self.parsers = parsers self.parsers_compiled = False self.local = os.path.join(conf.git_store, self.name, self.uri.replace('/', '_')) if not os.path.isdir(self.local): os.makedirs(self.local) self.credentials_helper_path = os.path.join( sys.prefix, 'bin', 'repoxplorer-git-credentials-helper') def __str__(self): return 'Git indexer of %s' % self.ref_id def save_seen_ref_in_cache(self): # Keep a cache a each ref that have been indexed # This is use later to discover seen refs no longer in projects.yaml # In that case a removal from the backend will be performed logger.debug("Save ref %s into seen_refs file" % self.ref_id) if not os.path.isfile(self.seen_refs_path): data = set() else: try: data = cPickle.load(file(self.seen_refs_path)) except Exception: # Protect against corrupted file data = set() data.add(self.ref_id) cPickle.dump(data, file(self.seen_refs_path, 'w')) def set_branch(self, branch): self.branch = branch self.ref_id = '%s:%s:%s' % (self.uri, self.name, self.branch) self.save_seen_ref_in_cache() def git_init(self): logger.debug("Git init for %s:%s in %s" % (self.uri, self.name, self.local)) run(["git", "init", "--bare", "."], self.local) if "origin" not in run(["git", "remote", "-v"], self.local): run(["git", "remote", "add", "origin", self.uri], self.local) def git_fetch_branch(self): logger.debug("Fetch %s %s:%s" % (self.name, self.uri, self.branch)) run([ "git", "-c", "credential.helper=%s" % self.credentials_helper_path, "fetch", "-nk", "origin", "+%s:%s" % (self.branch, self.branch) ], self.local) def get_refs(self): refs = run([ "git", "-c", "credential.helper=%s" % self.credentials_helper_path, "ls-remote", "origin" ], self.local).splitlines() self.refs = [] for r in refs: self.refs.append(r.split('\t')) def get_heads(self): self.heads = filter(lambda x: x[1].startswith('refs/heads/'), self.refs) def get_tags(self): self.tags = filter(lambda x: x[1].startswith('refs/tags/'), self.refs) def git_get_commit_obj(self): self.commits = get_all_shas(self.local) def run_workers(self, shas, workers): BULK_CHUNK = 1000 to_process = [] if workers == 0: # Default value (auto) workers = mp.cpu_count() - 1 or 1 while True: try: shas[BULK_CHUNK] to_process.append(shas[:BULK_CHUNK]) del shas[:BULK_CHUNK] except IndexError: # Add the rest to_process.append(shas) break options = [(self.local, self.ref_id, stp) for stp in to_process] worker_pool = mp.Pool(workers) worker_pool.map(process_commits, options) worker_pool.terminate() worker_pool.join() def is_branch_fully_indexed(self): branch = [ head for head in self.heads if head[1].endswith(self.branch) ][0] branch_tip_sha = branch[0] cmt = self.c.get_commit(branch_tip_sha, silent=True) if cmt and self.ref_id in cmt['repos']: return True return False def get_current_commit_indexed(self): """ Fetch from the index commits mentionned for this repo and branch. """ self.already_indexed = [ c['_id'] for c in self.c.get_commits(repos=[self.ref_id], scan=True) ] logger.debug( "%s: In the DB - repo history is composed of %s commits." % (self.name, len(self.already_indexed))) def compute_to_index_to_delete(self): """ Compute the list of commits (sha) to index and the list to delete from the index. """ logger.debug("%s: Upstream - repo history is composed of %s commits." % (self.name, len(self.commits))) self.to_delete = set(self.already_indexed) - set(self.commits) self.to_index = set(self.commits) - set(self.already_indexed) logger.debug("%s: Indexer will reference %s commits." % (self.name, len(self.to_index))) logger.debug("%s: Indexer will dereference %s commits." % (self.name, len(self.to_delete))) def compute_to_create_to_update(self): if self.to_index: res = self.c.get_commits_by_id(list(self.to_index)) to_update = [ c['_source'] for c in res['docs'] if c['found'] is True ] to_create = [c['_id'] for c in res['docs'] if c['found'] is False] return to_create, to_update return [], [] def index_tags(self): def c_tid(t): return "%s%s%s" % (t['sha'], t['name'].replace('refs/tags/', ''), t['repo']) if not self.tags: logger.debug('%s: no tags detected for this repository' % (self.name)) return logger.debug('%s: %s tags exist upstream' % (self.name, len(self.tags))) tags = self.t.get_tags([self.base_id]) existing = dict([(c_tid(t['_source']), t['_id']) for t in tags]) logger.debug('%s: %s tags already referenced' % (self.name, len(existing))) # Some commits may be not found because it is possible the branches # has not been indexed. commits = [ c['_source'] for c in self.c.get_commits_by_id([t[0] for t in self.tags])['docs'] if c['found'] ] lookup = dict([(c['sha'], c['committer_date']) for c in commits]) to_delete = [ v for k, v in existing.items() if k not in [ "%s%s%s" % (sha, name.replace('refs/tags/', '').replace('^{}', ''), self.base_id) for sha, name in self.tags ] ] docs = [] for sha, name in self.tags: if sha in lookup: doc = {} doc['name'] = name.replace('refs/tags/', '').replace('^{}', '') doc['sha'] = sha doc['date'] = lookup[sha] doc['repo'] = self.base_id if c_tid(doc) in existing: continue docs.append(doc) if docs: logger.info('%s: %s tags will be indexed' % (self.name, len(docs))) self.t.add_tags(docs) if to_delete: logger.info('%s: %s tags will be deleted' % (self.name, len(to_delete))) self.t.del_tags(to_delete) def index(self, extract_workers=1): # Compile the parsers if self.parsers: if not self.parsers_compiled: raw_parsers = copy.deepcopy(self.parsers) self.parsers = [] for parser in raw_parsers: self.parsers.append(re.compile(parser)) logger.debug("%s: Prepared %s regex parsers for commit msgs" % (self.name, len(self.parsers))) self.parsers_compiled = True # check whether a commit should be completly deleted or # updated by removing the repo from the repos field if self.to_delete: delete_commits(self.c, self.name, self.to_delete, self.ref_id) # check whether a commit should be created or # updated by adding the repo into the repos field if self.to_index: to_create, to_update = self.compute_to_create_to_update() if to_create: logger.info("%s: %s commits will be created ..." % (self.name, len(to_create))) self.run_workers(to_create, extract_workers) if to_update: logger.info( "%s: %s commits already indexed and need to be updated" % (self.name, len(to_update))) for c in to_update: c['repos'].append(self.ref_id) self.c.update_commits(to_update)
class RefsCleaner(): def __init__(self, projects, con=None): if not con: self.con = index.Connector() else: self.con = con self.projects = projects self.c = Commits(self.con) self.t = Tags(index.Connector( index=self.con.index, index_suffix='tags')) self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED) self.current_base_ids = set() def find_refs_to_clean(self): projects = self.projects.get_projects(source=['refs']) refs_ids = set() for project in projects.values(): for ref in project['refs']: self.current_base_ids.add(ref['shortrid']) refs_ids.add(ref['fullrid']) if not os.path.isfile(self.seen_refs_path): self.data = set() else: try: self.data = pickle.load(open(self.seen_refs_path, 'rb')) except Exception: # Protect against corrupted file self.data = set() refs_to_clean = self.data - refs_ids if len(refs_to_clean): logger.info("Found %s refs to clean." % len(refs_to_clean)) return refs_to_clean def clean_tags(self, base_id): # Tags are indexed by repos (base_id) not by ref (ref_id) tags = self.t.get_tags([base_id]) ids = [t['_id'] for t in tags] if ids: logger.info("Repo %s no longer referenced. Cleaning %s tags" % ( base_id, len(ids))) self.t.del_tags(ids) def clean_ref_cmts(self, ref): # Find ref's Commits ids = [c['_id'] for c in self.c.get_commits(repos=[ref], scan=True)] if not ids: self.remove_from_seen_refs(ref) return logger.info("Ref %s no longer referenced. Cleaning %s cmts." % (ref, len(ids))) # Do it by bulk of 10000 to not hurt memory bulk = 10000 i = 0 while True: _ids = ids[i:i+bulk] if not _ids: break else: delete_commits(self.c, ref, _ids, ref) i += bulk def clean(self, refs): base_ids = set() for ref in refs: self.clean_ref_cmts(ref) self.remove_from_seen_refs(ref) base_id = ref.replace(":%s" % ref.split(':')[-1], "") if base_id not in self.current_base_ids: base_ids.add(base_id) for base_id in base_ids: self.clean_tags(base_id) def remove_from_seen_refs(self, ref_id): # Remove from the struct to be dumped self.data.remove(ref_id) pickle.dump(self.data, open(self.seen_refs_path, 'wb'))
def setUpClass(cls): cls.con = index.Connector(index='repoxplorertest') cls.c = Commits(cls.con) cls.commits = [ { 'sha': '3597334f2cb10772950c97ddf2f6cc17b184', 'author_date': 1410456005, 'committer_date': 1410456010, 'ttl': 5, 'author_name': 'Nakata Daisuke', 'committer_name': 'Nakata Daisuke', 'author_email': '*****@*****.**', 'author_email_domain': 'joker.org', 'committer_email': '*****@*****.**', 'repos': [ 'https://github.com/nakata/monkey.git:monkey:master', ], 'line_modifieds': 10, 'merge_commit': False, 'commit_msg': 'Add init method', 'implement-partial-epic': ['Great Feature', ], 'files_list': [ 'ichi/', 'ichi/ni/kuruma.sh', 'ichi/ni/san/', 'ichi/ni/san/tamago.txt'], }, { 'sha': '3597334f2cb10772950c97ddf2f6cc17b185', 'author_date': 1410457005, 'committer_date': 1410457005, 'ttl': 0, 'author_name': 'Keiko Amura', 'committer_name': 'Keiko Amura', 'author_email': '*****@*****.**', 'author_email_domain': 'hanabi.org', 'committer_email': '*****@*****.**', 'repos': [ 'https://github.com/amura/kotatsu.git:kotatsu:master', ], 'line_modifieds': 100, 'merge_commit': True, 'commit_msg': 'Merge "Fix sanity unittest"', 'files_list': [], }, { 'sha': '3597334f2cb10772950c97ddf2f6cc17b186', 'author_date': 1410458005, 'committer_date': 1410458005, 'ttl': 0, 'author_name': 'Jean Bon', 'committer_name': 'Jean Bon', 'author_email': '*****@*****.**', 'author_email_domain': 'joker.org', 'committer_email': '*****@*****.**', 'repos': [ 'https://github.com/nakata/monkey.git:monkey:master', ], 'line_modifieds': 200, 'merge_commit': False, 'commit_msg': 'Add request customer feature 19', 'implement-feature': ['19', ], 'files_list': [ 'monkey/', 'monkey/__init__.py', 'ichi/', 'ichi/ni/hikoki.asm'], }, { 'sha': '3597334f2cb10772950c97ddf2f6cc17b187', 'author_date': 1410459005, 'committer_date': 1410459005, 'ttl': 0, 'author_name': 'Jean Bon', 'committer_name': 'Jean Bon', 'author_email': '*****@*****.**', 'author_email_domain': 'joker.org', 'committer_email': '*****@*****.**', 'repos': [ 'https://github.com/nakata/monkey.git:monkey:master', ], 'line_modifieds': 300, 'merge_commit': False, 'commit_msg': 'Add request customer feature 20', 'implement-feature': ['20', ], 'implement-partial-epic': ['Great Feature', ], 'files_list': [], }, { 'sha': '3597334f2cb10772950c97ddf2f6cc17b188', 'author_date': 1410460005, 'committer_date': 1410460005, 'ttl': 0, 'author_name': 'Jean Bon', 'committer_name': 'Jean Bon', 'author_email': '*****@*****.**', 'author_email_domain': 'joker.org', 'committer_email': '*****@*****.**', 'repos': [ 'https://github.com/amura/kotatsu.git:kotatsu:master', 'https://github.com/amura/kotatsu.git:kotatsu:devel'], 'line_modifieds': 400, 'merge_commit': False, 'commit_msg': 'Add request customer feature 21', 'files_list': [], }, { 'sha': '3597334f2cb10772950c97ddf2f6cc17b189', 'author_date': 1410461005, 'committer_date': 1410461005, 'ttl': 0, 'author_name': 'Jean Bon', 'committer_name': 'Jean Bon', 'author_email': '*****@*****.**', 'author_email_domain': 'joker.org', 'committer_email': '*****@*****.**', 'repos': [ 'https://github.com/amura/kotatsu.git:kotatsu:devel', ], 'line_modifieds': 400, 'merge_commit': False, 'commit_msg': 'Add request customer feature 22', 'files_list': [], }, { 'sha': '3597334f2cb10772950c97ddf2f6cc17b190', 'author_date': 1410491005, 'committer_date': 1410491005, 'ttl': 0, 'author_name': 'Jean Bon', 'committer_name': 'Jean Bon', 'author_email': '*****@*****.**', 'author_email_domain': 'joker.org', 'committer_email': '*****@*****.**', 'repos': [ 'https://github.com/amura/kotatsu.git:kotatsu:devel', ], 'line_modifieds': 400, 'merge_commit': False, 'commit_msg': 'Add request customer feature 23', 'files_list': [], }, { 'sha': '3597334f2cb10772950c97ddf2f6cc17b191', 'author_date': 46400, 'committer_date': 46400, 'ttl': 0, 'author_name': 'Marty Junior', 'committer_name': 'Marty Junior', 'author_email': '*****@*****.**', 'author_email_domain': 'joker.org', 'committer_email': '*****@*****.**', 'repos': [ 'https://github.com/amura/kotatsu.git:kotatsu:devel', ], 'line_modifieds': 400, 'merge_commit': False, 'commit_msg': 'Add request customer feature 23', 'files_list': [], } ] cls.c.add_commits(cls.commits)
class RefsCleaner(): def __init__(self, projects, con=None): if not con: self.con = index.Connector() else: self.con = con self.projects = projects self.c = Commits(self.con) self.t = Tags( index.Connector(index=self.con.index, index_suffix='tags')) self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED) self.current_base_ids = set() def find_refs_to_clean(self): projects = self.projects.get_projects(source=['refs']) refs_ids = set() for project in projects.values(): for ref in project['refs']: self.current_base_ids.add(ref['shortrid']) refs_ids.add(ref['fullrid']) if not os.path.isfile(self.seen_refs_path): self.data = set() else: try: self.data = pickle.load(open(self.seen_refs_path, 'rb')) except Exception: # Protect against corrupted file self.data = set() refs_to_clean = self.data - refs_ids if len(refs_to_clean): logger.info("Found %s refs to clean." % len(refs_to_clean)) return refs_to_clean def clean_tags(self, base_id): # Tags are indexed by repos (base_id) not by ref (ref_id) tags = self.t.get_tags([base_id]) ids = [t['_id'] for t in tags] if ids: logger.info("Repo %s no longer referenced. Cleaning %s tags" % (base_id, len(ids))) self.t.del_tags(ids) def clean_ref_cmts(self, ref): # Find ref's Commits ids = [c['_id'] for c in self.c.get_commits(repos=[ref], scan=True)] if not ids: self.remove_from_seen_refs(ref) return logger.info("Ref %s no longer referenced. Cleaning %s cmts." % (ref, len(ids))) # Do it by bulk of 10000 to not hurt memory bulk = 10000 i = 0 while True: _ids = ids[i:i + bulk] if not _ids: break else: delete_commits(self.c, ref, _ids, ref) i += bulk def clean(self, refs): base_ids = set() for ref in refs: self.clean_ref_cmts(ref) self.remove_from_seen_refs(ref) base_id = ref.replace(":%s" % ref.split(':')[-1], "") if base_id not in self.current_base_ids: base_ids.add(base_id) for base_id in base_ids: self.clean_tags(base_id) def remove_from_seen_refs(self, ref_id): # Remove from the struct to be dumped self.data.remove(ref_id) pickle.dump(self.data, open(self.seen_refs_path, 'wb'))
class RepoIndexer(): def __init__(self, name, uri, parsers=None, con=None, meta_ref=None): if not con: self.con = index.Connector() else: self.con = con self.c = Commits(self.con) self.t = Tags(index.Connector( index=self.con.index, index_suffix='tags')) if not os.path.isdir(conf.git_store): os.makedirs(conf.git_store) self.name = name self.uri = uri self.base_id = '%s:%s' % (self.uri, self.name) self.seen_refs_path = os.path.join(conf.db_path, SEEN_REFS_CACHED) if meta_ref: self.meta_ref = 'meta_ref: %s' % meta_ref else: self.meta_ref = None if not parsers: self.parsers = [] else: self.parsers = parsers self.parsers_compiled = False self.local = os.path.join(conf.git_store, self.name, self.uri.replace('/', '_')) if not os.path.isdir(self.local): os.makedirs(self.local) self.credentials_helper_path = getattr( conf, 'git_credential_helper_path', None) if not (self.credentials_helper_path and self.credentials_helper_path.startswith('/') and os.path.isfile(self.credentials_helper_path)): if self.credentials_helper_path: logger.warning( 'Configured git_credential_helper %s not found' % ( self.credentials_helper_path)) self.credentials_helper_path = None # Look at the default installation pathes if not self.credentials_helper_path: self.credentials_helper_path = os.path.join( sys.prefix, 'bin', 'repoxplorer-git-credentials-helper') if not os.path.isfile(self.credentials_helper_path): self.credentials_helper_path = shutil.which( 'repoxplorer-git-credentials-helper') if not self.credentials_helper_path: logger.warning( 'Default repoxplorer-git-credential-helper command ' 'not found') def __str__(self): return 'Git indexer of %s' % self.ref_id def save_seen_ref_in_cache(self): # Keep a cache a each ref that have been indexed # This is use later to discover seen refs no longer in projects.yaml # In that case a removal from the backend will be performed logger.debug("Save ref %s into seen_refs file" % self.ref_id) if not os.path.isfile(self.seen_refs_path): data = set() else: try: data = pickle.load(open(self.seen_refs_path, 'rb')) except Exception: # Protect against corrupted file data = set() data.add(self.ref_id) pickle.dump(data, open(self.seen_refs_path, 'wb')) def set_branch(self, branch): self.branch = branch self.ref_id = '%s:%s:%s' % (self.uri, self.name, self.branch) self.save_seen_ref_in_cache() def git_init(self): logger.debug("Git init for %s:%s in %s" % ( self.uri, self.name, self.local)) run(["git", "init", "--bare", "."], self.local) remotes = run(["git", "remote", "-v"], self.local) remote_names = [line.split()[0] for line in remotes.splitlines()] if "origin" not in remote_names: run(["git", "remote", "add", "origin", self.uri], self.local) def git_fetch_branch(self): logger.debug("Fetch %s %s:%s" % (self.name, self.uri, self.branch)) run(["git", "-c", "credential.helper=%s" % self.credentials_helper_path, "fetch", "-nk", "origin", "+%s:%s" % (self.branch, self.branch)], self.local) def get_refs(self): refs = run([ "git", "-c", "credential.helper=%s" % self.credentials_helper_path, "ls-remote", "origin"], self.local).splitlines() self.refs = [] for r in refs: self.refs.append(r.split('\t')) def get_heads(self): self.heads = [x for x in self.refs if x[1].startswith('refs/heads/')] def get_tags(self): self.tags = [x for x in self.refs if x[1].startswith('refs/tags/')] def git_get_commit_obj(self): self.commits = get_all_shas(self.local) def run_workers(self, shas, workers): BULK_CHUNK = 1000 to_process = [] if workers == 0: # Default value (auto) workers = mp.cpu_count() - 1 or 1 while True: try: shas[BULK_CHUNK] to_process.append(shas[:BULK_CHUNK]) del shas[:BULK_CHUNK] except IndexError: # Add the rest to_process.append(shas) break ref_ids = [self.ref_id] if self.meta_ref: ref_ids.append(self.meta_ref) options = [ (self.local, ref_ids, stp) for stp in to_process] worker_pool = mp.Pool(workers) worker_pool.map(process_commits, options) worker_pool.terminate() worker_pool.join() def is_branch_fully_indexed(self): branch = [head for head in self.heads if head[1].endswith(self.branch)][0] branch_tip_sha = branch[0] _, _, cmts_list = self.c.get_commits(repos=[self.ref_id], limit=1) if not cmts_list: return False cmt = cmts_list[0] if branch_tip_sha != cmt['sha']: return False return True def get_current_commits_indexed(self): """ Fetch from the index commits mentionned for this repo and branch. """ self.already_indexed = [c['_id'] for c in self.c.get_commits(repos=[self.ref_id], scan=True)] logger.debug( "%s: In the DB - repo history is composed of %s commits." % ( self.name, len(self.already_indexed))) def compute_to_index_to_delete(self): """ Compute the list of commits (sha) to index and the list to delete from the index. """ logger.debug( "%s: Upstream - repo history is composed of %s commits." % ( self.name, len(self.commits))) self.to_delete = set(self.already_indexed) - set(self.commits) self.to_index = set(self.commits) - set(self.already_indexed) logger.debug( "%s: Indexer will reference %s commits." % ( self.name, len(self.to_index))) logger.debug( "%s: Indexer will dereference %s commits." % ( self.name, len(self.to_delete))) def compute_to_create_to_update(self): if self.to_index: res = self.c.get_commits_by_id(list(self.to_index)) to_update = [c['_source'] for c in res['docs'] if c['found'] is True] to_create = [c['_id'] for c in res['docs'] if c['found'] is False] return to_create, to_update return [], [] def index_tags(self): def c_tid(t): return "%s%s%s" % (t['sha'], t['name'].replace('refs/tags/', ''), t['repo']) if not self.tags: logger.debug('%s: no tags detected for this repository' % ( self.name)) return logger.debug('%s: %s tags exist upstream' % ( self.name, len(self.tags))) tags = self.t.get_tags([self.base_id]) existing = dict([(c_tid(t['_source']), t['_id']) for t in tags]) logger.debug('%s: %s tags already referenced' % ( self.name, len(existing))) # Some commits may be not found because it is possible the branches # has not been indexed. commits = [c['_source'] for c in self.c.get_commits_by_id( [t[0] for t in self.tags])['docs'] if c['found']] lookup = dict([(c['sha'], c['committer_date']) for c in commits]) to_delete = [v for k, v in existing.items() if k not in ["%s%s%s" % (sha, name.replace('refs/tags/', '').replace('^{}', ''), self.base_id) for sha, name in self.tags]] docs = [] for sha, name in self.tags: if sha in lookup: doc = {} doc['name'] = name.replace('refs/tags/', '').replace('^{}', '') doc['sha'] = sha doc['date'] = lookup[sha] doc['repo'] = self.base_id if c_tid(doc) in existing: continue docs.append(doc) if docs: logger.info('%s: %s tags will be indexed' % ( self.name, len(docs))) self.t.add_tags(docs) if to_delete: logger.info('%s: %s tags will be deleted' % ( self.name, len(to_delete))) self.t.del_tags(to_delete) def index(self, extract_workers=1): # Compile the parsers if self.parsers: if not self.parsers_compiled: raw_parsers = copy.deepcopy(self.parsers) self.parsers = [] for parser in raw_parsers: self.parsers.append(re.compile(parser)) logger.debug( "%s: Prepared %s regex parsers for commit msgs" % ( self.name, len(self.parsers))) self.parsers_compiled = True # check whether a commit should be completly deleted or # updated by removing the repo from the repos field if self.to_delete: delete_commits(self.c, self.name, self.to_delete, self.ref_id) # check whether a commit should be created or # updated by adding the repo into the repos field if self.to_index: to_create, to_update = self.compute_to_create_to_update() if to_create: logger.info("%s: %s commits will be created ..." % ( self.name, len(to_create))) self.run_workers(to_create, extract_workers) if to_update: logger.info( "%s: %s commits already indexed and need to be updated" % ( self.name, len(to_update))) for c in to_update: c['repos'].append(self.ref_id) self.c.update_commits(to_update)
def setUpClass(cls): cls.con = index.Connector(index='repoxplorertest') cls.c = Commits(cls.con) cls.c.add_commits(COMMITS)
def index(self, prefix=None, nameonly='false', withstats='false', pid=None, dfrom=None, dto=None, inc_merge_commit=None): ci = Commits(index.Connector()) contributors_index = Contributors() groups = contributors_index.get_groups() if withstats == 'true': projects_index = Projects() if nameonly == 'true': ret = dict([(k, None) for k in groups.keys()]) if prefix: ret = dict([(k, None) for k in ret.keys() if k.lower().startswith(prefix)]) return ret ret_groups = {} for group, data in groups.items(): if prefix and not group.lower().startswith(prefix.lower()): continue rg = { 'members': {}, 'description': data.get('description', ''), 'domains': data.get('domains', []) } emails = list(data['emails'].keys()) members = contributors_index.get_idents_by_emails(emails) for id, member in members.items(): member['gravatar'] = hashlib.md5( member['default-email'].encode( errors='ignore')).hexdigest() # TODO(fbo): bounces should be a list of bounce # Let's deactivate that for now # member['bounces'] = bounces del member['emails'] if not member['name']: # Try to find it among commits suggested = ci.get_commits_author_name_by_emails( [member['default-email']]) name = suggested.get(member['default-email'], 'Unnamed') member['name'] = name del member['default-email'] rg['members'][utils.encrypt(xorkey, id)] = member if withstats == 'true': # Fetch the number of projects and repos contributed to query_kwargs = utils.resolv_filters( projects_index, contributors_index, pid, None, None, group, dfrom, dto, None, inc_merge_commit, None, None, None) repos = [ r for r in ci.get_repos(**query_kwargs)[1] if not r.startswith('meta_ref: ') ] projects = utils.get_projects_from_references( projects_index, repos) rg['repos_amount'] = len(repos) rg['projects_amount'] = len(projects) ret_groups[group] = rg return ret_groups
def __init__(self, connector=None): self.ic = Commits(connector)
for i in xrange(amount): author_date = random.randint(epoch_start, epoch_start + 1000000) author = emails[random.randint(0, email_amount - 1)] committer = emails[random.randint(0, email_amount - 1)] c = {} c['sha'] = hashlib.sha256(create_random_str(10)).hexdigest() c['author_name'] = author[0] c['committer_name'] = committer[0] c['author_email'] = author[1] c['committer_email'] = committer[1] c['author_date'] = author_date c['committer_date'] = random.randint(author_date + 1, author_date + 10000) c['ttl'] = random.randint(0, 10000) c['commit_msg'] = gen_commit_msg() c['line_modifieds'] = random.randint(0, 10000) c['merge_commit'] = False c['projects'] = [ project, ] ret.append(c) print "Generation of %s fake commits done." % amount return ret if __name__ == '__main__': amount = 100000 c = Commits(index.Connector()) c.add_commits(gen_fake_commits(amount)) print "Indexation done."