Ejemplo n.º 1
    def test_fetch_since_date(self):
        """Test whether commits are fetched from a Git repository since the given date"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        from_date = datetime.datetime(2014, 2, 11, 22, 7, 49)
        git = Git(self.git_path, new_path)
        commits = [commit for commit in git.fetch(from_date=from_date)]

        expected = [('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0),
                    ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0),
                    ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)]

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x][0])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x][0])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['updated_on'], expected[x][1])
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        # Test it using a datetime that includes the timezone
        from_date = datetime.datetime(2012,
                                          None, -36000))
        git = Git(self.git_path, new_path)
        commits = [commit for commit in git.fetch(from_date=from_date)]

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x][0])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x][0])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['updated_on'], expected[x][1])
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

Ejemplo n.º 2
    def test_fetch(self):
        """Test whether commits are fetched from a Git repository"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        git = Git(self.git_path, new_path)
        commits = [commit for commit in git.fetch()]

        expected = [('bc57a9209f096a130dcc5ba7089a8663f758a703', 1344965413.0),
                    ('87783129c3f00d2c81a3a8e585eb86a47e39891a', 1344965535.0),
                    ('7debcf8a2f57f86663809c58b5c07a398be7674c', 1344965607.0),
                    ('c0d66f92a95e31c77be08dc9d0f11a16715d1885', 1344965702.0),
                    ('c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', 1344966351.0),
                    ('589bb080f059834829a2a5955bebfd7c2baa110a', 1344967441.0),
                    ('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0),
                    ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0),
                    ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)]

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x][0])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x][0])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['updated_on'], expected[x][1])
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

def commit_counter(own, repo_url, d1, df, k):
    # url for the git repo to analyze
    #repo_url = 'https://github.com/kmn5409/INFO1601.git'
    print(own, "\t", repo_url)
    repo_url = 'https://github.com/' + own + '/' + repo_url + '.git'
    # directory for letting Perceval clone the git repo
    repo_dir = '/tmp/' + repo_url + '.git'

    # create a Git object, pointing to repo_url, using repo_dir for cloning
    repo = Git(uri=repo_url, gitpath=repo_dir)
    count = 0
    # fetch all commits as an iteratoir, and iterate it printing each hash
    mon = [
        "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
        "Nov", "Dec"
    for commit in repo.fetch():
        temp = commit['data']['CommitDate'].split(" ")
        day = int(temp[2])
        month = temp[1]
        for i in range(len(mon)):
            if (month == mon[i]):
                month_num = i + 1
        year = int(temp[4])
        if (isrecent(d1, day, month_num, year)):
            count += 1
    print("Number of commmits: ", count)
    df['Number of commits'][k] = count
    return count
Ejemplo n.º 4
 def list_committers(self, url, directory):
     # Return the list containing the strings from a git repository related to the users ordered by commit including repeated users to allow count gender contributions.
     repo = Git(uri=url, gitpath=directory)
     list_committers = []
     for user in repo.fetch():
         committer = self.removeMail(user['data']['Author'])
     return list_committers
Ejemplo n.º 5
    def test_fetch_empty_log(self):
        """Test whether it parsers an empty log"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        from_date = datetime.datetime(2020, 1, 1, 1, 1, 1)
        git = Git(self.git_path, new_path)
        commits = [commit for commit in git.fetch(from_date=from_date)]

        self.assertListEqual(commits, [])
Ejemplo n.º 6
    def test_fetch_from_empty_repository(self):
        """Test whether it parses from empty repository"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        git = Git(self.git_empty_path, new_path)
        commits = [commit for commit in git.fetch()]

        self.assertListEqual(commits, [])

Ejemplo n.º 7
def run(args):
    repo_url = args['git_repository']
    period_length = args['period']
    threshold = args['threshold']
    active_days = True

    # directory for letting Perceval clone the git repo
    # TODO: this is Linux-operating system specific. Should change
    repo_dir = '/tmp/' + repo_url.split('/')[-1] + '.git'

    first_commit = datetime.now(timezone.utc)
    authorDict = defaultdict(list)

    repo = Git(uri=repo_url, gitpath=repo_dir)

    for commit in repo.fetch():
        commitdate = datetime.strptime(commit['data']['AuthorDate'], '%a %b %d %H:%M:%S %Y %z')
        if commitdate < first_commit:
            first_commit = commitdate
    logging.info("Authors found: " + str(len(authorDict)))

    logging.info("Authors after merge: " + str(len(authorDict)))
    author_count = author_counting(authorDict, period_length, active_days)
#    print(author_count)
    (effort_periods, full_time_periods, non_full_time_periods) = project_period_effort(author_count, threshold, period_length)
    maxeffort_periods = project_period_maxeffort(author_count, period_length)

    # Printing results
    print("  Length of period (in months):", period_length)
    print("  Threshold t (in commits in a period):", threshold)
    print("  First commit date:", first_commit, "--", round((datetime.now(timezone.utc)-first_commit).days/30, 2) , "months ago")
    print("  Maximum possible development effort (in person-months):", sum(maxeffort_periods.values()))
    print(pretty_print_period(period_length, first_commit, ["FT", "Non-FT", "Effort"], full_time_periods, non_full_time_periods, effort_periods))
    print(" " * 8, "FT: Full-time developers")
    print("  ---> Estimated development effort (in person-months):", round(sum(effort_periods.values()), 2))
    print("For more information, visit http://github.com/gregoriorobles/git2effort")
Ejemplo n.º 8
def get_commits(username, reponame, commits, config):
    TODO: Add docstring. See: https://realpython.com/documenting-python-code/
    TODO: Implement recursion argument, default to False.


    `username` : str, required
    `reponame` : str, required
    `commits` : list, required

        If no sound is set for the animal or passed in as a
    repo_URL = 'https://github.com/' + username + '/' + reponame

     # checks whether the export dir exists and if not creates it # TODO: this is a code snippet we use three times, we should make a function out of it
    local_dir = os.path.join(config["data_dir"],'grimoire_dumps')
    if not os.path.isdir(local_dir):
    data_dump_path = os.path.join(local_dir, username + '-' + reponame)

    git = Git(repo_URL, data_dump_path)
    # `fetch()` gets commits from all branches by default.
    # It returns a list of dictionaries, where the `data` key in each
    # dictionary contains the actual metadata for each commit.
    # Other stuff are metadata about the perceval `fetch()` operation.
        repo_fetched = [commit for commit in git.fetch()]

        # issue 33 (very ugly) band aid: delete *.pack files once downloaded by perceval
        shutil.rmtree(os.path.join(data_dump_path, 'objects','pack'), ignore_errors=True)
        # Keep just commit `data`
        for commit_data in repo_fetched:
    except RepositoryError as repo_error:
        logging.warning("Error with this repository: " + username + "/" + reponame, file=stderr)
def getAllGitCommits(repo_url,repo_dir ):
#    DEFAULT_LAST_DATETIME = datetime.datetime(2019, 11, 19, 0, 0, 0, tzinfo=dateutil.tz.tzutc())
    repo = Git(uri=repo_url, gitpath=repo_dir)
#    commits = [commit for commit in repo.fetch(to_date= DEFAULT_LAST_DATETIME)]
    commits = [commit for commit in repo.fetch()]
    return commits
Ejemplo n.º 10
def analyze_git(es_write):

    #INDEX = 'git_gecko'
    #PROJECT = 'gecko'
    #git = Git("https://github.com/mozilla/gecko-dev.git", "../gecko_all_commits_final_version_no_cm_options_nobrowser_nochrome_notoolkit.log")

    #INDEX = 'git_webkit'
    #PROJECT = 'webkit'
    #git = Git("https://github.com/WebKit/webkit.git", "../webkit_final_log_no_mc_options.log")

    INDEX = "git_blink"
    PROJECT = "blink"
    git = Git("https://chromium.googlesource.com/chromium",

    commits = []
    cont = 1
    uniq_id = 1
    first = True
    docs = []

    all_files = pandas.DataFrame()

    es_write.indices.delete(INDEX, ignore=[400, 404])
    es_write.indices.create(INDEX, body=MAPPING_GIT)

    for item in git.fetch():

        if cont % 15000 == 0:
            git_events = events.Git(commits)
            events_df = git_events.eventize(1)

            # Add flags if found
            message_log = MessageLogFlag(events_df)
            events_df = message_log.enrich('message')

            splitemail = SplitEmail(events_df)
            events_df = splitemail.enrich("owner")

            # Code for webkit
            # If there's a bot committing code, then we need to use the values flag
            if PROJECT == 'webkit':
                ## Fix values in the owner column
                events_df.loc[events_df["email"] == '*****@*****.**',
                              "owner"] = events_df["values"]
                # Re-do this analysis to calculate the right email and user
                splitemail = SplitEmail(events_df)
                events_df = splitemail.enrich("owner")

            # Code for Blink
            # If there's a flag, then we need to update the owner
            if PROJECT == 'blink':
                events_df.loc[(events_df["values"] == '') ^ True,
                              "owner"] = events_df["values"]
                splitemail = SplitEmail(events_df)
                events_df = splitemail.enrich("owner")

            splitdomain = SplitEmailDomain(events_df)
            events_df = splitdomain.enrich("email")
            #events_df.drop("message", axis=1, inplace=True)

            # Add project information
            events_df["project"] = PROJECT

            test = events_df.to_dict("index")

            docs = []
            for i in test.keys():
                header = {
                    "_index": INDEX,
                    "_type": "item",
                    "_id": int(uniq_id),
                    "_source": test[i]
                uniq_id = uniq_id + 1

            helpers.bulk(es_write, docs)

            commits = []
        cont = cont + 1

    helpers.bulk(es_write, docs)
Ejemplo n.º 11
with open(out_file) as str_data:
    json_data = json.load(str_data)

# getting data via perceval in Python

url = 'https://github.com/chaoss/grimoirelab-toolkit'
local_path = './chaoss-grimoirelab-toolkit'
output_file = './grimoirelab-git.json'

git = Git(url, local_path)

commits = [commit for commit in git.fetch()]

dumped = json.dumps(commits, sort_keys=True, indent=4)

# save the Perceval docs to a file
with open(output_file, 'w') as f:

# load the Perceval docs from a file
with open(output_file, 'r') as f:
    content = f.read()
    commits = json.loads(content)

for c in commits:
Ejemplo n.º 12
#! /usr/bin/env python3
from flake8.api import legacy as flake8
from graal.graal import GraalRepository
from perceval.backends.core.git import Git
import random

repo_url = input("Enter url: ")
repo_dir = input("Enter dir: ")
worktree_path = input("Enter worktree path: ")
# Git object, pointing to repo_url and repo_dir for cloning
ggit = Git(uri=repo_url , gitpath=repo_dir)

# clone the repository (if it doesn't exist locally)

commits = list(ggit.fetch())
# hash of random commit
commit = random.choice(commits)
_hash = commit['data']['commit']
# or input the hash of certain commit
# _hash = input("Enter hash: ")

gral_repo = GraalRepository(uri=repo_url, dirpath=repo_dir)

# checkout the commit
style_guide = flake8.get_style_guide()
files = worktree_path
# generate report by flake8
Ejemplo n.º 13
 def numCommits(self, url, directory):
     repo = Git(uri=url, gitpath=directory)
     count = 0
     for commit in repo.fetch():
         count += 1
     return count
Ejemplo n.º 14
def main(args):
    github_key = args.github_token
    list_jsons = os.listdir(os.path.abspath(args.output_path))
    repo_set = set()
    with open(args.urls_file, 'r') as url_file:
        for line in url_file:
            if line in ['\n', '\r\n']:
                url = line.split('/')
                if not url:
                repo = "%s/%s" % (url[3], url[4])
            except IndexError:
                print("url:" + line)
                logger.error("Error in repo (line) " + line + "\r\n")


    for repo in sorted(repo_set):

        repo_split = repo.split('/')
        outfile_name = "%s_%s.json" % (repo_split[0], repo_split[1])
        outfile_path = "%s/%s" % (args.output_path, outfile_name)

        if outfile_name in list_jsons:
            logger.info("Already downloaded: %s " % outfile_name)
        if "framework" in outfile_name:
            logger.info("Skipping <framework> repository")

        api_url = "https://api.github.com/repos/" + str(repo) + "?access_token=" + github_key
        logger.info("Checking metadata for repo %s" % api_url)
            response = urllib.request.urlopen(api_url)
        except urllib.error.HTTPError:
            logger.error("HTTP 404: Not found: %s" % repo)

            json_data = response.read().decode('utf-8')
            dicc_out = json.loads(json_data)
        except ValueError:
            logger.warning("Error in response (ValueError)")

        if 'message' in dicc_out:
            result = dicc_out['message']
        elif dicc_out == {}:
            result = 'False'
            result = dicc_out['private']

        if result == 'Not Found':
            logger.error("Not found: %s" % repo)
        elif result == 'True':
            logger.error("Private: %s" % repo)
            repo_url = "https://github.com/%s" % repo + ".git"

            logger.info('Executing Perceval with repo: %s' % repo)
            logger.debug('Repo stats. Size: %s KB' % dicc_out["size"])
            gitpath = '%s/%s' % (os.path.abspath(args.perceval_path), repo)
            git = Git(uri=repo_url, gitpath=gitpath)
                commits = [commit for commit in git.fetch()]
            except Exception as e:
                logger.warning("Failure while fetching commits. Repo: %s" % repo)
            logger.info('Exporting results to JSON...')
            with open(outfile_path, "w", encoding='utf-8') as jfile:
                json.dump(commits, jfile, indent=4, sort_keys=True)
            logger.info('Exported to %s' % outfile_path)
            if not args.cache_mode_on:
Ejemplo n.º 15
repo_list = []

for repo in resp:
    repo_name = repo['name']
    repo_url = repo['html_url']  #github repository url
    repo_dir = '/tmp/' + repo_name

    print("Started " + repo_name)
    commit_count = 0
    issue_count = 0
    pull_count = 0
    git_commit = Git(uri=repo_url, gitpath=repo_dir)

    #count no of commits
    for commit in git_commit.fetch():
        date_diff = datetime.now() - datetime.strptime(
            commit['data']['CommitDate'][:-6], "%a %b %d %H:%M:%S %Y")
        if date_diff.days <= 90:
            commit_count += 1

    items = GitHub(owner=parent, repository=repo_name, api_token=github_token)
    #count no of pull_requests,issues
    for item in items.fetch():
        date_diff = datetime.now() - datetime.strptime(
            item['data']['created_at'], "%Y-%m-%dT%H:%M:%SZ")
        if date_diff.days <= 90:
            if 'pull_request' in item['data']:
                pull_count += 1
                issue_count += 1
Ejemplo n.º 16
def git_repos():
    users = []  # creating empty lists
    test = []

    #target url"vidyaratna.git"
    repo_url = 'https://github.com/amfoss/vidyaratna.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    #target url"cms"
    repo_url = 'https://github.com/amfoss/cms.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    #target url""TempleApp"
    repo_url = 'https://github.com/amfoss/TempleApp.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    #target url""website.git"
    repo_url = 'https://github.com/amfoss/website.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    #target url""WebApp.git"
    repo_url = 'https://github.com/amfoss/WebApp.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    #target url"cms-mobile""
    repo_url = 'https://github.com/amfoss/cms-mobile.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    #target url
    repo_url = 'https://github.com/amfoss/Praveshan.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/bot.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/tasks.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/star-me.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/amdec-website.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Wiki.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/GitLit.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Qujini.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/attendance-tracker.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/events.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Hack4Amrita.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/master-syllabus.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/test-repo.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/webspace.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/internal-hackathon.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/foss-meetups.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/automated-scripts.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/fosswebsite.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/fosster.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Foss-talks.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/cybergurukulam.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/kdeconf.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/android-workshop-summer-2018.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/App.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Workshops.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Wikimedia_Hackathon_Amrita_University.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/website_old.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
            commit['data']['Author'])  # collect the data and author details

    for u in Counter(users).keys():
        a = "Commits:" + str(Counter(users).get(u)) + "\t User: "******"\n"
        test.append(a)  # appending all the fetched commits into 'test'

    return test
Ejemplo n.º 17
    help="Date that you want to fetch information from in format YYYYMMDD")
    help="Date that you want to fetch information till in format YYYYMMDD")
args = parser.parse_args()

# Owner and repository names
(owner, repo) = args.repo.split('/')
repo_git_uri = "http://github.com/{}/{}.git".format(owner, repo)
repo_dir = 'tmp/perceval'

# Convert from and to date to datetime object
fr_dt_tuple = map(int,
                  (args.fromdate[:4], args.fromdate[4:6], args.fromdate[6:]))
fr_dt = datetime.datetime(*fr_dt_tuple)
to_dt_tuple = map(int, (args.todate[:4], args.todate[4:6], args.todate[6:]))
to_dt = datetime.datetime(*to_dt_tuple)

git_obj = Git(uri=repo_git_uri, gitpath=repo_dir)
github_obj = GitHub(owner=owner, repository=repo, api_token=args.token)

# Big dicts printed, can be pretty printed for convenience
for commit in git_obj.fetch():
    print(commit, '\n')

for item in github_obj.fetch(from_date=fr_dt, to_date=to_dt):
    print(item, '\n')
Ejemplo n.º 18
import json
import datetime
import dateutil

parser = argparse.ArgumentParser(description="Simple parser for Git commits.")
parser.add_argument("-r", "--repo",
                    help = "Git repository URI")
parser.add_argument("-p", "--gitpath",
                    help = "Gitpath of the repository")
parser.add_argument("-d", "--create_dump",
                    help = "y for for creating json dump of data, empty for printing to terminal")

args = parser.parse_args()

from_date = datetime.datetime(2019, 6, 1, 0, 0, 0, tzinfo=dateutil.tz.tzutc())
to_date = datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=dateutil.tz.tzutc())

# make sure a cloned repo folder is deleted in gitpath if already present
repo = Git(uri=args.repo, gitpath=args.gitpath)

commits_gen = repo.fetch(from_date=from_date, to_date=to_date)
commits = list(commits_gen)

if args.create_dump == 'y':
    with open("git_commits.json", "w") as file:
        json.dump(commits, file)
    for commit in commits:
        for field in commit.keys():
            print(str(field) + ':' + str(commit[field]))
Ejemplo n.º 19
from perceval.backends.core.git import GitCommand
from datetime import datetime

#setting up Git Argument parser
parser = GitCommand.setup_cmd_parser()

# making arguments list
arg = [
    'https://github.com/sumitskj/Prajawalan2019.git', '--git-path',
args = parser.parse(*arg)

# making Git object
repo = Git(uri=args.uri, gitpath=args.git_path)

# finding the no. of commits and listing them all
count = 0

from_date = datetime(2018, 10, 12)
to_date = datetime(2019, 12, 9)
branches = 'master'

item = list(repo.fetch(category='commit', from_date=from_date,

print("Number of commmits: %d." % len(item))
j = 0
for i in item:
    j = j + 1
    print("Commit no " + str(j) + ": " + i['data']['commit'])
Ejemplo n.º 20
def analyze_git(es_write):

    #INDEX = 'git_gecko'
    #PROJECT = 'gecko'
    #git = Git("https://github.com/mozilla/gecko-dev.git", "../gecko_all_commits_final_version_no_cm_options_nobrowser_nochrome_notoolkit.log")

    #INDEX = 'git_webkit'
    #PROJECT = 'webkit'
    #git = Git("https://github.com/WebKit/webkit.git", "../webkit_final_log_no_mc_options.log")

    INDEX = "git_blink"
    PROJECT = "blink"
    git = Git("https://chromium.googlesource.com/chromium", "../blink_final_log_no_cm_options.log")

    commits = []
    cont = 1
    uniq_id = 1
    first = True
    docs = []

    all_files = pandas.DataFrame()

    es_write.indices.delete(INDEX, ignore=[400, 404])
    es_write.indices.create(INDEX, body=MAPPING_GIT)

    for item in git.fetch():

        if cont % 15000 == 0:
            git_events = events.Git(commits)
            events_df = git_events.eventize(1)

            # Add flags if found
            message_log = MessageLogFlag(events_df)
            events_df = message_log.enrich('message')

            splitemail = SplitEmail(events_df)
            events_df = splitemail.enrich("owner")

            # Code for webkit
            # If there's a bot committing code, then we need to use the values flag
            if PROJECT == 'webkit':
                ## Fix values in the owner column
                events_df.loc[events_df["email"]=='*****@*****.**', "owner"] = events_df["values"]
                # Re-do this analysis to calculate the right email and user
                splitemail = SplitEmail(events_df)
                events_df = splitemail.enrich("owner")

            # Code for Blink
            # If there's a flag, then we need to update the owner
            if PROJECT == 'blink':
                events_df.loc[(events_df["values"]=='') ^ True, "owner"] = events_df["values"]
                splitemail = SplitEmail(events_df)
                events_df = splitemail.enrich("owner")

            splitdomain = SplitEmailDomain(events_df)
            events_df = splitdomain.enrich("email")
            #events_df.drop("message", axis=1, inplace=True)

            # Add project information
            events_df["project"] = PROJECT

            test = events_df.to_dict("index")

            docs = []
            for i in test.keys():
                header = {
                      "_index": INDEX,
                      "_type": "item",
                      "_id": int(uniq_id),
                      "_source": test[i]
                uniq_id = uniq_id + 1

            helpers.bulk(es_write, docs)

            commits = []
        cont = cont + 1

    helpers.bulk(es_write, docs)
classifier = nltk.NaiveBayesClassifier.train(train_set)

# Read command line arguments
parser = argparse.ArgumentParser(description="Count commits in a git repo")
parser.add_argument("repo", help="Repository url")
parser.add_argument("dir", help="Directory for cloning the repository")
parser.add_argument("--print", action='store_true', help="Print hashes")
args = parser.parse_args()

# create a Git object, and count commmits
repo = Git(uri=args.repo, gitpath=args.dir)
countcommit = 0
countuser = 0

for commit in repo.fetch():
    if args.print:
        #       print("PPRINT COMMIT['DATA']")
        #       pprint(commit['data'])
    countcommit += 1

males = 0
females = 0
for user in repo.fetch():
    # print("PPRINT USER['DATA']")
    # pprint(classifier.classify(gender_features(user['data']['Author'])))
    if (classifier.classify(gender_features(
            user['data']['Author'])) == 'male'):
        males += 1
    elif (classifier.classify(gender_features(
Ejemplo n.º 22
git_repo_dir = '/tmp/perceval.git'
# Create a Git object, pointing to repo_url, using repo_dir for cloning
repo = Git(uri=git_repo_url, gitpath=git_repo_dir)
print("Starting 1")
Uses the git object to print information about the repository,
this will then create the directory /tmp/perceval.git
other parameters you can use are:
	commit: aaa7a9209f096aaaadccaaa7089aaaa3f758a703
	Author:     John Smith <*****@*****.**>
	AuthorDate: Tue Aug 14 14:30:13 2012 -0300
	Commit:     John Smith <*****@*****.**>
	CommitDate: Tue Aug 14 14:30:13 2012 -0300

for commit in repo.fetch():
print("Starting 2")

# Url for the mailing list to analyze
mail_repo_url = 'https://mail-archives.apache.org/mod_mbox/httpd-dev/'
# Directory for letting Perceval clone the mailing list
mail_repo_dir = '/tmp/perceval/'
repo = PipermailList(url=mail_repo_url, dirpath=mail_repo_dir)
#Does not seem to affect what repositories are printed
k = str_to_datetime("1996-04")
k = datetime_to_utc(k)
for message in repo.fetch(from_date=k):
Ejemplo n.º 23
    def test_fetch_branch(self):
        """Test whether commits are fetched from a Git repository for a given branch"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        from_date = datetime.datetime(2014, 2, 11, 22, 7, 49)
        git = Git(self.git_path, new_path)
        # Let's fetch master
        commits = [commit for commit in git.fetch(branches=['master'])]

        expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703',

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        # Now let's fetch lzp
        commits = [commit for commit in git.fetch(branches=['lzp'])]

        expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703',

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        # Now, let's fech master and lzp
        commits = [commit for commit in git.fetch(branches=['master', 'lzp'])]

        expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703',

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        # Now, let's fetch None, which means "all commits"
        commits = [commit for commit in git.fetch(branches=None)]

        expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703',

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        # Now, let's fetch [], which means "no commits"
        commits = [commit for commit in git.fetch(branches=[])]

        expected = []

        self.assertEqual(len(commits), len(expected))

Ejemplo n.º 24
def getCommits(user_owner, repo_name):
    repo = Git(f"https://github.com/{user_owner}/{repo_name}.git",
    commits = repo.fetch()
    return commits
Ejemplo n.º 25
count = 0
t_data = []
data = []
# with open(filepath2) as outfile:
time_data = json.load(codecs.open(filepath2, 'r', 'utf-8-sig'))
# json.loads(open(filepath2).read().decode('utf-8-sig'))
for p in time_data:
    st = p['WindowsTimeZones'][0]['Name']
    # print(st[4:10])
    p['TimeZone'] = st[4:7] + st[8:10]

with open(filepath2, 'w') as outfile:
    json.dump(t_data, outfile)

for x in repo.fetch():
    t_val = x['data']['AuthorDate'][-5:]
    list_of_countries = []
    for p in time_data:
        if t_val == p['TimeZone']:
            # print(t_val,p['TimeZone'],'afdf')
    x['data']['CountryName'] = list_of_countries
    x['data']['TimeZone'] = t_val
with open(filepath, 'w') as outfile:
    json.dump(data, outfile)
# for commit in repo.fetch():
#     if args.print:
#         print(commit['data']['commit'])
#! /usr/bin/env python3
# Count commits

import argparse

from perceval.backends.core.git import Git

# Read command line arguments
parser = argparse.ArgumentParser(description="Count commits in a git repo")
parser.add_argument("repo", help="Repository url")
parser.add_argument("dir", help="Directory for cloning the repository")
parser.add_argument("--print", action='store_true', help="Print hashes")
args = parser.parse_args()

# create a Git object, and count commmits
repo = Git(uri=args.repo, gitpath=args.dir)
count = 0
for commit in repo.fetch():
    if args.print:
    count += 1
print("Number of commmits: %d." % count)
Ejemplo n.º 27
# Initializing the Git backend
git_backend = Git(uri=REPOSITORY_URL, gitpath=REPO_DIR)

# Range of dates in which commits are to be fetched
from_date = datetime(2018, 10, 1)
to_date = datetime(2019, 2, 5)

# Repo Branches from which commits to be fetched [ 2/3 ]
repo_branches = ["master", "develop"]

# Calling fetch method
# The method retrieves from a Git repository or a log file a list of
# commits. Commits are returned in the same order they were obtained.
range_commits = git_backend.fetch(branches=repo_branches,
range_commits_list = list(range_commits)
n_commits = len(range_commits_list)
print("NUMBER OF COMMITS: ", n_commits)

last_commit = range_commits_list[n_commits - 1]
pprint(range_commits_list[n_commits - 1].keys())

for commit in range_commits_list:
        "COMMIT DATE: {commit_date}\nAUTHOR: {author_name}\nCOMMIT MESSAGE: {commit_message}"