def get_commit(self, key, by='id'): if by == 'id': return Commit(self, key, self._verbose) if by == 'fingerprint': # TODO implement computing fingerprints c_id = self._fingerprints[key] return Commit(self, c_id, self._verbose) return None
def test_commit(data, expected_date, is_pass): if not is_pass: with pytest.raises(Exception) as e_info: # Expecting an exception commit = Commit(data) commit.get_date() else: # Shouldn't get an exception commit = Commit(data) assert commit.get_date() == expected_date, "Date or the format did not match " \ "the expected date: %s" % expected_date
def test_compute_weight(): commits = [ Commit(None, datetime(2020, 1, 1, 5), None, 5, 5), # 30 + 0.5 Commit(None, datetime(2020, 1, 1, 6), None, 2, 3), # 30.5 + 0.25 Commit(None, datetime(2020, 1, 1, 7), None, 3, 2), # 30.75 + 0.25 Commit(None, datetime(2020, 1, 3, 8), None, 20, 5) # 31 + 1.25 - 1 = 31.25 ] assert compute_historical_weight([commits[0]]) == 30.5 assert compute_historical_weight([commits[0], commits[1]]) == 30.75 assert compute_historical_weight([commits[0], commits[1], commits[2]]) == 31 assert compute_historical_weight( [commits[0], commits[1], commits[2], commits[3]]) == 31.25
def find(query, components): conn = DB.getConn() c = conn.cursor() c.execute(query, components) commitrows = c.fetchall() commitfiles = [] if commitrows: allcommitids = ",".join([str(int(commit[0])) for commit in commitrows]) #This is poor practice, but we assured ourselves the value is composed only of ints first DB.execute(c, "SELECT * from " + DB.commitfile._table + " WHERE commitid IN (" + allcommitids + ")") commitfiles = c.fetchall() DB.execute(c, "SELECT * from " + DB.commitkeyword._table + " WHERE commitid IN (" + allcommitids + ")") commitkeywords = c.fetchall() commits = [] for i in commitrows: r = Repo() r.loadFromValues(i[DB.commit._numColumns + 0], i[DB.commit._numColumns + 1], i[DB.commit._numColumns + 2], i[DB.commit._numColumns + 3], i[DB.commit._numColumns + 4], i[DB.commit._numColumns + 5]) files = [file[DB.commitfile.file] for file in commitfiles if file[DB.commitfile.commitid] == i[DB.commit.id]] keywords = [keyword[DB.commitkeyword.keyword] for keyword in commitkeywords if keyword[DB.commitkeyword.commitid] == i[DB.commit.id]] c = Commit() c.loadFromDatabase(r, i, files, keywords) commits.append(c) return commits
def parse(filename): """ Parse a file formatted by `git log --numstat` :param filename: Name of the file to parse :returns: A list of Commit objects. """ with open(filename) as f: parsedcommits = [] # have to add a newline to the start so regular expression lines up logfile = '\n' + f.read() commits = re.split('\n(commit [0-9a-f]{40})', logfile) for i in range(1, len(commits), 2): commithash = commits[i].split(' ')[1] sections = commits[i + 1].split('\n\n') authorsection = sections[0].split('\n') if any(line.startswith('Merge:') for line in authorsection): continue # Skip merge commits author = authorsection[1].split('Author: ')[1] # remove time zone from date dateline = authorsection[2] datestring = ' '.join(dateline.split(' ')[1:-1]) date = datetime.strptime(datestring.strip(), '%a %b %d %H:%M:%S %Y') message = sections[1].strip() changes = Repo.extractchanges(sections[2].strip().split('\n')) parsedcommits.append(Commit(commithash, author, date, message, changes)) parsedcommits.sort(key=lambda commit: commit.date) return parsedcommits
def log(self, tip, sincecommit=None, until=None, since=None, path=None, n=None): if since is not None or path is not None: raise NotImplementedError() if SHA_MATCHER.match(tip) is None: tip = self.revparse(tip) if sincecommit and SHA_MATCHER.match(sincecommit) is None: tip = self.revparse(sincecommit) oldref = "&oldRefSpec=" + sincecommit if sincecommit else "" url = self.repo.url + "/commits?newRefSpec=%s%s" % (tip, oldref) r = requests.get(url, auth=self.credentials) r.raise_for_status() commits = r.json()['commits'] log = [] for c in commits: commit = Commit(self.repo, c['sha'], None, c.get('parent', geogig.NULL_ID), c['message'], c['author']['name'], c['author']['date'], c['committer']['name'], c['committer']['date']) log.append(commit) return log
def fetch_raw_commits(exclude_repositories=()): """ Yields all commits, with status and perplexity set to null. """ query = r''' SELECT repo, sha, time, message FROM commits_raw ''' if exclude_repositories: # Convert string to single value tuple. if isinstance(exclude_repositories, str): exclude_repositories = (exclude_repositories, ) placeholders = ', '.join(repeat('?', len(exclude_repositories))) query = ''' {query} WHERE repo NOT IN ({placeholders}) '''.format(query=query, placeholders=placeholders) for row in conn.execute(query, exclude_repositories): yield Commit(repo=row[0], sha=row[1], time=row[2], message=row[3], status=None, perplexity=None)
def getCommits(numberOfCommits=100,updateCommitCache=False): commits = [] commitsJSON = getSavedCommits() if updateCommitCache or not commitsJSON: url = 'https://api.github.com/repos/mozilla/gecko-dev/commits' \ +"?client_id="+gitClientID \ +"&client_secret="+gitClientSecret \ +"&per_page="+str(numberOfCommits) response = requests.get(url).json() shas = [r['sha'] for r in response] commitsJSON = [getDetailedCommit(sha) for sha in shas] commits = [Commit(c) for c in commitsJSON] saveCommits(commitsJSON) else: commits = [Commit(c) for c in commitsJSON] return commits
def parseContent(content): if content == "": assert False, "File is empty or does not exist" out = "<html>\n<head>\n" + style + "<title>Git Commit history</title>\n</head>\n<body style='background-color: #bfbfdf;' class='body'>" count = 0 commits = content.split("\ncommit ") out += "<div class='commit' id='top'>\n<h1 class='centered'>VEX Robotics Competition</h1>\n<h2 class='centered'>Team 709S Programming Log / Notebook<h2>\n</div>\n<br>\n" commit = 0 lcom = "" lcommits = [] for commit in range(1, len(commits)): aline = 0 lines = commits[commit].split("\n") try: lines[aline] except: continue line = "" lline = "" date = "" commitkey = "" author = "" description = "" filesModified = [] filesAdded = [] filesDeleted = [] try: commitkey = lines[aline] aline += 1 author = lines[aline].split("Author:")[1] aline += 1 date = lines[aline].split("Date: ")[1] aline += 2 description = lines[aline][4:] aline += 2 except: continue filesModified = [] filesAdded = [] filesDeleted = [] for aline in range(aline, len(lines)): if lines[aline].endswith(".pdf"): continue if ". M" in lines[aline]: filesModified.append(lines[aline][39:]) elif ". A" in lines[aline]: filesAdded.append(lines[aline][39:]) elif ". D" in lines[aline]: filesDeleted.append(lines[aline][39:]) tcommit = Commit(date, commitkey, author, description, filesModified, filesAdded, filesDeleted) lcommits.append(tcommit) out += styleContent(tcommit) + "<div class='spacer'></div>" saveList(lcommits) out += script + "</body>\n</html>\n" lcom = commitkey return out
def push_commit(self, branch, msg=''): self.commits.append(Commit(deepcopy(branch))) hash_code = self.generate_hash() while self.has_hash(hash_code): hash_code = self.generate_hash() self.commits[-1].set_hash(hash_code) self.commits[-1].set_msg(msg) print(self.commits[-1].branch)
def read_commits_data_from_csv(csv_filename=CSV_FILENAME, csv_file_delimiter=CSV_FILE_DELIMITER): global commits commits = [] csv_contents = pd.read_csv(csv_filename, delimiter=csv_file_delimiter) rows = len(csv_contents) cols = len(csv_contents.iloc[0]) for i in range(rows): data = csv_contents.iloc[i] commit = Commit(data["commitId"], data["project"], data["comment"], data["label"], data[4:]) commits.append(commit) Commit.prepare_text_vectorizer()
def process_commands(server): while True: if (commands.qsize() > 0): command = commands.get() try: commit = Commit(command[0]) log_add(commit) commits.put((commit, command[1])) except ValueError as e: response = str(e) server.sendto(response.encode(), command[1])
def fetch_commits(autogen=False): """ Yields all fully-processed commits. """ cursor = conn.execute(r''' SELECT repo, sha, time, message, status, perplexity FROM {view} '''.format(view='commits_autogen' if autogen else 'commits')) for row in cursor: yield Commit(*row)
def main(argv): if len(argv) < 2: show_usage() if argv[1] == 'commit': if argv[2:] == []: print ('Empty') show_usage() print('commit') c = Commit(argv[2:], None) c.sendFile(argv[2], argv[3]) elif argv[1] == 'update': if len(argv[2:]) != 3: show_usage() c = Commit(argv[2:], None) c.updateOperation(argv[2], "Update", argv[3], argv[4]) elif argv[1] == 'checkout': if len(argv[2:]) != 3: show_usage() c = Commit(argv[2:], None) c.updateOperation(argv[2], "Checkout", argv[3], argv[4]) else: show_usage()
def fetch_commit_by_sha(sha): cursor = conn.execute( ''' SELECT repo, sha, time, message, status, perplexity FROM commits WHERE sha = ? ''', (sha, )) try: return Commit(*cursor.fetchone()) except TypeError: raise KeyError(sha)
def get_commits(data): sep = 72 * '-' index = 0 commits = [] while (index < len(data) and index >= 0): if index + 1 >= len(data): break details = data[index + 1].split(' | ') changes = get_changes(data, index + 3) comment = get_comment(data, index + 3) commits.append(Commit(details, changes, comment)) index = data.index(sep, index + 1) return commits
def process_commands(server): while True: command = commands.get(block=True) try: commit = Commit(command[0]) log_add(commit) commits.put((commit, command[1], command[2])) Redes3.grpc_log.append(commit) Listen_Udp.upd_log.append(commit) except ValueError as e: response = str(e) if command[1] == None: print(response) else: server.sendto(response.encode(), command[1])
def fetch_commits_by_repo(repo_name): cursor = conn.execute( r''' SELECT repo, sha, time, message FROM commits_raw WHERE repo = :repo ''', {'repo': repo_name}) for row in cursor: yield Commit(repo=row[0], sha=row[1], time=row[2], message=row[3], status=None, perplexity=None)
def write_to_json_commits_per_day(token, day): """ Parameters: token: authorization token day: day of commits we want to retrieve """ begin = day.replace(hour=0, minute=0, second=0) begin = begin.strftime("%Y-%m-%dT%H:%M:%SZ") end = day.replace(hour=23, minute=59, second=59) end = end.strftime("%Y-%m-%dT%H:%M:%SZ") file_output = day.strftime("%Y%m%d") commits_list = [] i = 1 while True: params = { "per_page": "100", "page": str(i), "since": begin, "until": end } url = "https://api.github.com/repos/facebook/react/commits" commits = make_request(url, token, params=params) print(commits) if not commits: break i += 1 nb_commits = 0 for commit in commits: sha = commit["sha"] date = commit["commit"]["author"]["date"] contributor = get_contributor_from_dict(commit["author"]) commit_obj = Commit(sha, date, contributor) commits_list.append(commit_obj) nb_commits += 1 if nb_commits < 100: break write_to_json("commits/" + file_output + ".json", commits_list)
def getCommits(repo, startdate, enddate): localfolder = urlToFolder(repo.url) differ = gdiff.diff_match_patch() repoloc = 'git-repos/' + localfolder + '/' if os.path.exists(repoloc): c = pygit.Repo(repoloc) else: os.makedirs(repoloc) c = pygit.Repo.init(repoloc) c.create_remote('origin', repo.url) c.remotes.origin.fetch() c.remotes.origin.pull('master') commits = [] msgs = c.iter_commits(since=unixToGitDateFormat(startdate)) for m in msgs: if m.committed_date > enddate: continue alldiffs = [] for d in m.diff('HEAD~1').iter_change_type('M'): #Changed left = d.a_blob.data_stream.read() right = d.b_blob.data_stream.read() diffs = differ.diff_main(left, right) if diffs: differ.diff_cleanupSemantic(diffs) for d in diffs: if d[0] != 0 and d[1].strip(): alldiffs.append(d) for d in m.diff().iter_change_type('A'): #Added pass for d in m.diff().iter_change_type('D'): #Deleted pass for d in m.diff().iter_change_type('R'): #Renamed pass c = Commit() c.loadFromSource(repo, m.message, m.committed_date, m.stats.files.keys(), m.__str__(), alldiffs) commits.append(c) return commits
def get_commits_since_date(repo, commit_date): commits_url = f"https://api.github.com/repos/joankilleen/{repo}/commits?since={commit_date}" print(commits_url) # Git all commits for date from Github response = github_calls.get_from_gitgub(url=commits_url) if response.status_code != 200: raise Exception( f"Commits could not be read from GitHub {response.status_code}") json = response.json() commits = [] # Iterate over commit for json_object in json: sha = json_object['sha'] commit_detail_url = f"https://api.github.com/repos/joankilleen/{repo}/commits/{sha}" # Get details of commit from GitHub reponse_for_commit = github_calls.get_from_gitgub( url=commit_detail_url) print(f"{reponse_for_commit.status_code}") if reponse_for_commit.status_code != 200: raise Exception( f"Commit {sha} could not be read from GitHub {reponse_for_commit.status_code}" ) # Read the changed files in the commit files = reponse_for_commit.json()['files'] filenames = [] for file in files: print(file['filename']) filenames.append(file['filename']) # Create a Commit object commit = Commit(sha, filenames) print(commit.sha) commits.append(commit) print(len(commits)) return commits
def getCommits(repo, startdate, enddate): end_rev = pysvn.Revision(pysvn.opt_revision_kind.date, enddate) start_rev = pysvn.Revision(pysvn.opt_revision_kind.date, startdate) c = pysvn.Client() commits = [] msgs = c.log(repo.url, revision_start=start_rev, revision_end=end_rev, discover_changed_paths=True) msgs.reverse() for m in msgs: date = m.data['revprops']['svn:date'] message = m.data['message'] paths = [p.path for p in m.data['changed_paths']] c = Commit() c.loadFromSource(repo, message, date, paths, m.data['revision'].number, []) commits.append(c) return commits
def from_string(cls, repo, line): """ Create a new Tag instance from the given string. ``repo`` is the Repo ``line`` is the formatted tag information Format name: [a-zA-Z_/]+ <null byte> id: [0-9A-Fa-f]{40} Returns ``git.Tag`` """ full_name, ids = line.split("\x00") name = full_name.split("/")[-1] commit = Commit(repo, id=ids) return Tag(name, commit)
def commitFromString(self, lines): message = False messagetext = [] parents = None commitid = None for line in lines: tokens = line.split(' ') if message: if line.startswith("\t") or line.startswith(" "): messagetext.append(line.strip()) else: message = False else: if tokens[0] == 'commit': commitid = tokens[1] if tokens[0] == 'tree': tree = tokens[1] if tokens[0] == 'parent': if len(tokens) > 1: parents = [t for t in tokens[1:] if t != ""] elif tokens[0] == 'author': author = " ".join(tokens[1:-3]) authordate = datetime.datetime.fromtimestamp( (int(tokens[-2]) - int(tokens[-1])) // 1000) elif tokens[0] == 'committer': committer = tokens[1] committerdate = datetime.datetime.fromtimestamp( (int(tokens[-2]) - int(tokens[-1])) // 1000) elif tokens[0] == 'message': message = True if commitid is not None: c = Commit(self.repo, commitid, tree, parents, "\n".join(messagetext), author, authordate, committer, committerdate) return c else: return None
def generateCommitsFunction(nCommits: int): """Generates mock commit objects array""" commits = [] ORIGINAL_EXPECTED_DATE_STRING = '2021-01-17T09:11:55Z' EXPECTED_DATE = datetime.strptime(ORIGINAL_EXPECTED_DATE_STRING, "%Y-%m-%dT%XZ") for i in range(nCommits): EXPECTED_DATE_STRING = ( EXPECTED_DATE + timedelta(seconds=(i * 30))).strftime("%Y-%m-%dT%XZ") CORRECT_COMMIT_RESPONSE_DICT = { "commit": { "committer": { "date": EXPECTED_DATE_STRING } } } commits.append(Commit(CORRECT_COMMIT_RESPONSE_DICT)) def get_commits(): return commits return get_commits
def fetch_commit(repo=None, sha=None): """ Fetch a single commit by its repository name and SHA, or simply its SHA. """ if not repo and not sha: raise ValueError('Must provide either repo or sha') if not repo: return fetch_commit_by_sha(sha) cursor = conn.execute( ''' SELECT repo, sha, time, message, status, perplexity FROM commits WHERE repo = ? AND sha = ? ''', (repo, sha)) try: return Commit(*cursor.fetchone()) except TypeError: raise KeyError(sha)
workspace = Workspace(os.getcwd()) database = Database(db_path) entries = list() for name in workspace.list_files(): data = workspace.read_file(name) blob = Blob(data) database.store(blob) entries.append(Entry(name, blob.oid)) tree = Tree(entries) database.store(tree) name = os.environ['GIT_AUTHOR_NAME'] email = os.environ['GIT_AUTHOR_EMAIL'] author = Author(name, email, time()) message = sys.stdin.read() commit = Commit(tree.oid, author, message) database.store(commit) with open(os.path.join(git_path, 'HEAD'), 'w+') as file: file.write(commit.oid) print(f'[(root-commit) {commit.oid}] {message.splitlines()[0]}') sys.exit(0) else: print(f'git: {command!r} is not a git command.', file=sys.stderr) sys.exit(1)
repos = defaultdict(list) with open(filename, encoding='UTF-8') as csv_file: reader = csv.reader(csv_file, quoting=csv.QUOTE_ALL) for row in reader: try: repo, sha, time_str, message_raw, status = row except ValueError as e: # Unexpected pprint from pprint import pprint as print print(row) raise e time = datetime.fromtimestamp(int(time_str) // 10**6) message = unescape_message(message_raw) tokens = tokenize(message) commit = Commit(repo=repo, sha=sha, time=time, message=message, tokens=tokens, status=status) if commit.is_valid: repos[commit.repo].append(commit) with open('commits.pickle', 'wb') as pickle_file: pickle.dump(repos, pickle_file)
def blame(cls, repo, commit, file): """ The blame information for the given file at the given commit Returns list: [git.Commit, list: [<line>]] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance. """ data = repo.git.blame(commit, '--', file, p=True) commits = {} blames = [] info = None for line in data.splitlines(): parts = re.split(r'\s+', line, 1) if re.search(r'^[0-9A-Fa-f]{40}$', parts[0]): if re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line): m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line) id, origin_line, final_line, group_lines = m.groups() info = {'id': id} blames.append([None, []]) elif re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line): m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line) id, origin_line, final_line = m.groups() info = {'id': id} elif re.search(r'^(author|committer)', parts[0]): if re.search(r'^(.+)-mail$', parts[0]): m = re.search(r'^(.+)-mail$', parts[0]) info["%s_email" % m.groups()[0]] = parts[-1] elif re.search(r'^(.+)-time$', parts[0]): m = re.search(r'^(.+)-time$', parts[0]) info["%s_date" % m.groups()[0]] = time.gmtime( int(parts[-1])) elif re.search(r'^(author|committer)$', parts[0]): m = re.search(r'^(author|committer)$', parts[0]) info[m.groups()[0]] = parts[-1] elif re.search(r'^filename', parts[0]): info['filename'] = parts[-1] elif re.search(r'^summary', parts[0]): info['summary'] = parts[-1] elif parts[0] == '': if info: c = commits.has_key(info['id']) and commits[info['id']] if not c: c = Commit( repo, id=info['id'], author=Actor.from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor.from_string( info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[info['id']] = c m = re.search(r'^\t(.*)$', line) text, = m.groups() blames[-1][0] = c blames[-1][1].append(text) info = None return blames
def get_commits(self) -> List[Commit]: response = self.__session.get( url=self.__commits_url.replace("{/sha}", "")) return [Commit(data=data) for data in response.json()]