Beispiel #1
0
def git_pull(remote_url, local, idc_dir):
    """
    @remote: git remote address
    @local: local git repo dir
    """
    commit_msg = None
    if not os.path.exists(local):
        os.mkdir(local)

    logging.info('git start pull from remote repo.')
    if not os.path.exists(
            "{0}/.git".format(local)) or not git.repo.fun.is_git_dir(
                "{0}/.git".format(local)):
        repo_clone = Repo.clone_from(remote_url, local)
        if not repo_clone:
            logging.debug("clone repo from {0} to {1}failed.\n".format(
                remote_url, local))
            return
    repo = Repo(local)
    remote = None
    git_commit = git.Git(idc_dir)

    last_commit_log = "/var/log/last_commit.log"
    last_commit = None
    if os.path.exists(last_commit_log):
        with file(last_commit_log) as fhand:
            last_log_commit = fhand.read().strip()
        # Warning: for performance only check 500 commit, use this write mysql
        for commit in repo.iter_commits('master', max_count=500):
            if last_commit == commit.hexsha:
                last_commit = last_log_commit
                break
    if not last_commit:
        last_commit = repo.head.commit.hexsha

    with FileLock(os.path.split(local)[1]) as _flock:
        for remote_name in repo.remotes:
            if remote_name.url == remote_url:
                remote = remote_name
                break

        if remote is None:
            remote_name = "%x" % (random.randint(0, 1000000)
                                  ^ int(time.time()))
            remote = repo.create_remote(remote_name, remote_url)

        ret = 0
        try:
            info_list = remote.pull()
            for info in info_list:
                ret = info.flags & (git.FetchInfo.REJECTED
                                    | git.FetchInfo.ERROR)
            if ret > 0:
                logging.warning("[conf-pull] pull from remote error!\n")
            commit_msg = git_commit.log("{0}..HEAD".format(last_commit),
                                        "--pretty=%H,%ai,%s", idc_dir)
        except (git.GitCommandError, git.RepositoryDirtyError), e:
            logging.warning("git pull error, error={0}\n".format(str(e)))
        except Exception:
            logging.warning("git pull error, other exception.\n")
Beispiel #2
0
def getCommits(repo_name):
    shas = set()
    commits_list = []
    repo = Repo(os.path.join(work_dir, repo_name))
    for b in repo.remote().fetch():
        if '/' not in b.name:
            continue
        print("start to check branch {} of {}".format(b.name, repo_name))
        branch_name = b.name.split('/')[1]
        repo.git.checkout('-B', branch_name, b.name)
        commits = list(repo.iter_commits())
        for idx, commit in enumerate(commits):
            if str(commit) in shas:
                continue
            else:
                shas.add(str(commit))
            commit_collection = {}
            xml_cnt = 0
            kot_jav_cnt = 0

            file_list = list(commit.stats.files)
            for file in file_list:
                if len(file) > 4 and file[-4:] == '.xml':
                    xml_cnt += 1
                elif len(file) > 3 and file[-3:] == '.kt' or len(
                        file) > 5 and file[-5:] == '.java':
                    kot_jav_cnt += 1
            if xml_cnt >= 1 and kot_jav_cnt >= 1:
                commit_collection["commit_id"] = str(commit)
                commit_collection["commit_msg"] = str.strip(commit.message)
                commit_collection["commit_time"] = str(
                    commit.committed_datetime)
                commit_collection["committer_name"] = commit.author.name
                commit_collection["committer_email"] = commit.author.email

                diff_files = []
                if not commit.parents:
                    continue
                else:
                    for diff in commit.parents[0].diff(commit):
                        diff_file = {}
                        diff_file["file_path"] = diff.a_path
                        diff_file["change_type"] = diff.change_type
                        diff_file["lang"] = os.path.splitext(
                            diff.a_path)[1][1:]
                        diff_files.append(diff_file)

                commit_collection["diff_files"] = diff_files
                commit_collection["parent_commit_num"] = len(commit.parents)
                commits_list.append(commit_collection)

    repo_dump = json.dumps(commits_list, indent=2, ensure_ascii=False)
    with open(os.path.join(stats_dir, repo_name + ".json"), "w") as commit_fd:
        commit_fd.write(repo_dump)
        print(repo_name + " done")
Beispiel #3
0
class GitSync(object):
    def __init__(self, database):
        logging.info("Initializing GitSync.")

        self.database = database
        self.charts = dict()
        self.url = DEFAULT_GITREPO

        try:
            self.repo = Repo(REPO_DIRECTORY)
        except InvalidGitRepositoryError:
            logging.info("Cloning repository in %s", REPO_DIRECTORY)
            self.repo = Repo.clone_from(self.url, REPO_DIRECTORY)

    @coroutine
    def update_repo(self, document):
        if document['o']["charts"]["repo_url"] != self.url:
            logging.info("Repo url updated to '%s'.",
                         document['o']["charts"]["repo_url"])
            self.url = document['o']["charts"]["repo_url"]

    @coroutine
    def sync_loop(self):
        logging.info("Initializing sync loop.")
        yield add_callback("Settings", self.update_repo)

        synced_head = None

        settings = yield Query(self.database, "Settings").find_one() or dict()
        if settings["charts"]:
            self.url = settings["charts"]["repo_url"]

        while True:
            try:
                if self.url != self.repo.remotes.origin.url:
                    logging.info("Changing reference from '%s' to '%s'.",
                                 self.repo.remotes.origin.url, self.url)
                    self.repo.delete_remote('origin')
                    self.repo.create_remote('origin', url=self.url)
                    synced_head = None

                self.repo.git.fetch('--all')
                self.repo.git.reset('--hard', 'origin/master')

                if synced_head != self.repo.head.ref.commit:
                    yield self.sync()
                    synced_head = self.repo.head.ref.commit

            except:
                logging.exception("Failed to pull repository.")

            yield sleep(5)

    @coroutine
    def sync(self):
        logging.info("Syncing '%s'.", REPO_DIRECTORY)

        charts = yield Query(self.database, "Charts", manipulate=True).find()
        for chart in charts:
            path = chart["path"]
            self.charts[path] = chart

        discovered_charts = dict()
        for subdir, _, files in os.walk(REPO_DIRECTORY):
            for chart_file in files:
                if chart_file == "Chart.yaml":
                    try:
                        discovered_charts[subdir] = yield self.import_chart(
                            subdir)
                    except Exception:
                        logging.exception("Failed to import chart at '%s'",
                                          subdir)

        for path, existing in self.charts.iteritems():
            discovered = discovered_charts.get(path, None)

            if discovered is None:
                logging.debug("Deleting chart %(name)s", existing)
                yield Query(self.database, 'Charts').remove(existing)
            else:
                discovered["_id"] = existing["_id"]
                discovered["metadata"] = existing["metadata"]

                if discovered["commit"] != existing["commit"]:
                    logging.debug("Updating existing chart %(name)s",
                                  discovered)
                    yield Query(self.database, "Charts",
                                manipulate=True).update(discovered)

        for path, discovered in discovered_charts.iteritems():
            if discovered and "_id" not in discovered:
                logging.debug("Inserting new chart %(name)s", discovered)
                try:
                    yield Query(self.database, "Charts",
                                manipulate=True).insert(discovered)
                except:
                    logging.error("Failed to insert chart %(name)s",
                                  discovered)

        self.charts = discovered_charts

    @coroutine
    def import_chart(self, directory):
        chart_path = os.path.join(directory, "Chart.yaml")

        with open(chart_path, "r") as stream:
            chart = load(stream)
            chart["path"] = directory

            commit = self.repo.iter_commits(paths=chart_path).next()
            chart["commit"] = binascii.hexlify(commit.binsha)
            chart["committed_date"] = commit.committed_date
            chart["resources"] = []

            manifests = yield self.import_manifests(directory)
            for _, manifest in manifests.iteritems():
                if commit.committed_date < manifest["commit"].committed_date:
                    chart["commit"] = binascii.hexlify(
                        manifest["commit"].binsha)
                    chart["committed_date"] = manifest["commit"].committed_date

                for resource in manifest["resources"]:
                    chart["resources"].append(resource)

            raise Return(chart)

    @coroutine
    def import_manifests(self, directory):
        manifests = dict()

        manifests_path = os.path.join(directory, "manifests", "*.yaml")
        for manifest in glob.glob(manifests_path):
            with open(manifest, "r") as stream:
                manifests[manifest] = dict(
                    resources=[resource for resource in load_all(stream)],
                    commit=self.repo.iter_commits(paths=manifest).next())

        manifests_path = os.path.join(directory, "templates", "*.yaml")
        for manifest in glob.glob(manifests_path):
            manifest_filename = ntpath.basename(manifest)
            rendered_manifest = check_output([
                "tide", "view", "-f", "templates/" + manifest_filename,
                directory
            ])
            with io.TextIOWrapper(io.BytesIO(rendered_manifest)) as stream:
                manifests[manifest] = dict(
                    resources=[resource for resource in load_all(stream)],
                    commit=self.repo.iter_commits(paths=manifest).next())

        raise Return(manifests)
Beispiel #4
0
class DLLearnerRepo(Iterator):
    """
    TODO:
        - add progress bar for repo download
        - find out automatically whether the repo is already cloned and in case
          it is, just run a pull (after having set the branch)
    """
    _commits_get_url = 'https://api.github.com/repos/AKSW/DL-Learner/commits' +\
                       '?per_page=10000&since=%s&sha=%s'
    _github_repo_url = 'https://github.com/AKSW/DL-Learner.git'

    def __init__(self, repo_dir_path, since=None, branch='develop',
                 already_cloned=False):

        # strip off trailing directory separator
        if repo_dir_path.endswith(os.path.sep):
            self.repo_dir_path = repo_dir_path[:-1]
        else:
            self.repo_dir_path = repo_dir_path

        if since is None:
            self.since = datetime.now() - timedelta(default_time_delta_in_days)
        else:
            self.since = since

        self.branch = branch
        self.commit_sha1s = None
        self.next_idx = None

        self._setup_repo(already_cloned)

    def __len__(self):
        if self.commit_sha1s is None:
            self._init_commit_sha1s()

        return len(self.commit_sha1s)

    def _setup_repo(self, already_cloned):
        if already_cloned:
            self._git_repo = Repo(self.repo_dir_path)
        else:
            _log.info('Cloning repo from %s into %s' % (
                self._github_repo_url, self.repo_dir_path))
            self._git_repo = Repo.clone_from(
                self._github_repo_url, self.repo_dir_path)
            _log.info('-Done-')

        if self.branch:
            self._git_repo.git.checkout(self.branch)
            # self._git_repo.active_branch = self.branch

    def __iter__(self):
        return self

    def __next__(self):
        if self.commit_sha1s is None:
            self._init_commit_sha1s()

        self.next_idx += 1
        if self.next_idx >= len(self.commit_sha1s):
            raise StopIteration

        return DLLearnerCommit(self.commit_sha1s[self.next_idx], self)

    def _init_commit_sha1s(self):
        commit_sha1s = []

        for c in self._git_repo.iter_commits():
            """Iters the commits backwards in time, i.e. the latest commit
            comes first and the oldest comes last
            """
            c_date = datetime.fromtimestamp(c.committed_date)

            if c_date < self.since:
                break

            commit_sha1s.append(c.hexsha)

        commit_sha1s.reverse()

        self.commit_sha1s = \
            [c for c in commit_sha1s if c not in commits_to_skip]
        self.next_idx = -1

    def get_checkout_cmd(self):
        return self._git_repo.git.checkout
def getEventsOf(owner, repo_name):
    fix_commits = []
    # res = requests.get("https://api.github.com/repos/{}/{}/events".format(owner, repo_name))
    referencePairs = {}
    after = ""
    id = 0
    evt_cnt = 0
    while True:
        request = requests.post(
            'https://api.github.com/graphql',
            json={'query': getQuery(owner, repo_name, after)},
            headers={"Authorization": "Bearer %s" % token})
        result = request.json()
        # result = json.dumps(result['data'], indent=2)
        # with open("repo_name_" + str(id), "w") as fd:
        #     fd.write(result)
        #     break

        if 'errors' in result:
            print(result)
            return 1

        search = result['data']['search']
        edges = search['edges']

        for event in edges:
            if len(event['node']) == 0:
                continue
            commits = event['node']['timelineItems']['nodes']
            if commits != []:
                for commit in commits:
                    commit_ = commit['commit']
                    if commit_ is None:
                        continue
                    if 'fix' in commit_['message']:
                        fix_commits.append(commit_['oid'])
                    referencePairs[event['node']['number']] = {
                        "creationdate":
                        event['node']['createdAt'].replace('T', " ").replace(
                            'Z', " +0000"),
                        "resolutiondate":
                        event['node']['closedAt'].replace('T', " ").replace(
                            'Z', " +0000"),
                        "hash":
                        commit_['oid'],
                        "commitdate":
                        commit_['committedDate'].replace('T', " ").replace(
                            'Z', " +0000"),
                    }
                    evt_cnt += 1

        # print("Issue count:", search['issueCount'], "number of edges:",
        #       len(search['edges']))

        # print("PageInfo:", search['pageInfo'], "cursor:",
        #       search['edges'][-1]['cursor'], "\n")

        if not search['pageInfo']['hasNextPage']:
            print("{}/{} done with {} commits, issue count {}".format(
                owner, repo_name, evt_cnt, search['issueCount']))
            break

        after = 'after: "%s"' % search['edges'][-1]['cursor']
        time.sleep(1)
        id += 1

    repo = Repo(os.path.join(work_dir, owner + '-' + repo_name))
    commits = list(repo.iter_commits())
    for idx, commit in enumerate(commits):
        if 'fix' in commit.message and str(commit) not in fix_commits:
            res = re.search(r'[+-]\d{2}:', str(commit.committed_datetime))
            referencePairs[str(commit)] = {
                "creationdate":
                str(commit.committed_datetime)[:res.start()] + " +0000",
                "resolutiondate":
                str(commit.committed_datetime)[:res.start()] + " +0000",
                "hash":
                str(commit),
                "commitdate":
                str(commit.committed_datetime)[:res.start()] + " +0000",
            }
            evt_cnt += 1
    print("{}/{} find {} fix commits in total".format(owner, repo_name,
                                                      evt_cnt))
    if evt_cnt != 0:
        with open(os.path.join(res_dir, repo_name + "_issue.json"), "w") as fd:
            fd.write(json.dumps(referencePairs, indent=2))
            with open("evet_list", "a") as list_fd:
                list_fd.write("{} {}\n".format(owner, repo_name))

    return evt_cnt
Beispiel #6
0
def check(repo_obj):
    global row
    global commit_cnt
    global changed_files_cnt
    global all_commit_cnt
    global all_changed_cnt

    repo_name = repo_obj["owner"]["login"] + "-" + repo_obj["name"]
    # print("="*8, "start to check repo ", repo_name, "="*8)
    commit_total = 0
    commit_cross = 0
    percent = 0

    shas = set()
    repo = Repo(os.path.join(work_dir, repo_name))
    for b in repo.remote().fetch():
        if '/' not in b.name:
            continue
        print("start to check branch {} of {}".format(b.name, repo_name))
        branch_name = b.name.split('/')[1]
        repo.git.checkout('-B', branch_name, b.name)
        commits = list(repo.iter_commits())
        for idx, commit in enumerate(commits):
            if str(commit) in shas:
                continue
            else:
                shas.add(str(commit))
            if len(commit.parents) > 1:
                continue
            commit_total += 1
            xml_cnt = 0
            kot_jav_cnt = 0
            all_commit_cnt += 1

            # if idx == len(commits)-1:
            file_list = list(commit.stats.files)
            all_changed_cnt += len(file_list)
            for file in file_list:
                if len(file) > 4 and file[-4:] == '.xml':
                    xml_cnt += 1
                elif len(file) > 3 and file[-3:] == '.kt' or len(
                        file) > 5 and file[-5:] == '.java':
                    kot_jav_cnt += 1
            if xml_cnt >= 1 and kot_jav_cnt >= 1:
                commit_cnt += 1
                changed_files_cnt += len(file_list)
                commit_cross += 1
        #     break
        # diff_index = commit.diff(commits[idx+1])

        # for diff_item in diff_index:
        #     if diff_item.a_path[-4:] == '.xml':
        #         xml_cnt += 1
        #     elif diff_item.a_path[-3:] == '.kt' or diff_item.a_path[-5:] == '.java':
        #         kot_jav_cnt += 1
        # if xml_cnt >= 1 and kot_jav_cnt >= 1:
        #     commit_cross += 1

    percent = float(commit_cross) / commit_total
    repo_name_full = repo_obj["full_name"]

    res = "{} Total: {}, Cross: {}, Percent: {}".format(
        repo_name_full, commit_total, commit_cross, percent)
    print(res)
    percent_coll.append((repo_obj["full_name"], percent * 100))
    # print("="*8, "check repo ", repo_name, "completed", "="*8)
    return commit_total, commit_cross, percent
Beispiel #7
0
class GitSync(object):

    def __init__(self, database):
        logging.info("Initializing GitSync.")

        self.database = database
        self.charts = dict()
        self.url = DEFAULT_GITREPO

        try:
            self.repo = Repo(REPO_DIRECTORY)
        except InvalidGitRepositoryError:
            logging.info("Cloning repository in %s", REPO_DIRECTORY)
            self.repo = Repo.clone_from(self.url, REPO_DIRECTORY)

    @coroutine
    def update_repo(self, document):
        if document['o']["charts"]["repo_url"] != self.url:
            logging.info("Repo url updated to '%s'.", document['o']["charts"]["repo_url"])
            self.url = document['o']["charts"]["repo_url"]

    @coroutine
    def sync_loop(self):
        logging.info("Initializing sync loop.")
        yield add_callback("Settings", self.update_repo)

        synced_head = None

        settings = yield Query(self.database, "Settings").find_one() or dict()
        if settings["charts"]:
            self.url = settings["charts"]["repo_url"]

        while True:
            try:
                if self.url != self.repo.remotes.origin.url:
                    logging.info("Changing reference from '%s' to '%s'.", self.repo.remotes.origin.url, self.url)
                    self.repo.delete_remote('origin')
                    self.repo.create_remote('origin', url=self.url)
                    synced_head = None

                self.repo.git.fetch('--all')
                self.repo.git.reset('--hard', 'origin/master')

                if synced_head != self.repo.head.ref.commit:
                    yield self.sync()
                    synced_head = self.repo.head.ref.commit

            except:
                logging.exception("Failed to pull repository.")

            yield sleep(5)

    @coroutine
    def sync(self):
        logging.info("Syncing '%s'.", REPO_DIRECTORY)

        charts = yield Query(self.database, "Charts", manipulate=True).find()
        for chart in charts:
            path = chart["path"]
            self.charts[path] = chart

        discovered_charts = dict()
        for subdir, _, files in os.walk(REPO_DIRECTORY):
            for chart_file in files:
                if chart_file == "Chart.yaml":
                    try:
                        discovered_charts[subdir] = yield self.import_chart(subdir)
                    except Exception:
                        logging.exception("Failed to import chart at '%s'", subdir)

        for path, existing in self.charts.iteritems():
            discovered = discovered_charts.get(path, None)

            if discovered is None:
                logging.debug("Deleting chart %(name)s", existing)
                yield Query(self.database, 'Charts').remove(existing)
            else:
                discovered["_id"] = existing["_id"]
                discovered["metadata"] = existing["metadata"]

                if discovered["commit"] != existing["commit"]:
                    logging.debug("Updating existing chart %(name)s", discovered)
                    yield Query(self.database, "Charts", manipulate=True).update(discovered)

        for path, discovered in discovered_charts.iteritems():
            if discovered and "_id" not in discovered:
                logging.debug("Inserting new chart %(name)s", discovered)
                try:
                    yield Query(self.database, "Charts", manipulate=True).insert(discovered)
                except:
                    logging.error("Failed to insert chart %(name)s", discovered)

        self.charts = discovered_charts

    @coroutine
    def import_chart(self, directory):
        chart_path = os.path.join(directory, "Chart.yaml")

        with open(chart_path, "r") as stream:
            chart = load(stream)
            chart["path"] = directory

            commit = self.repo.iter_commits(paths=chart_path).next()
            chart["commit"] = binascii.hexlify(commit.binsha)
            chart["committed_date"] = commit.committed_date
            chart["resources"] = []

            manifests = yield self.import_manifests(directory)
            for _, manifest in manifests.iteritems():
                if commit.committed_date < manifest["commit"].committed_date:
                    chart["commit"] = binascii.hexlify(manifest["commit"].binsha)
                    chart["committed_date"] = manifest["commit"].committed_date

                for resource in manifest["resources"]:
                    chart["resources"].append(resource)

            raise Return(chart)

    @coroutine
    def import_manifests(self, directory):
        manifests = dict()

        manifests_path = os.path.join(directory, "manifests", "*.yaml")
        for manifest in glob.glob(manifests_path):
            with open(manifest, "r") as stream:
                manifests[manifest] = dict(
                    resources=[resource for resource in load_all(stream)],
                    commit=self.repo.iter_commits(paths=manifest).next()
                )

        manifests_path = os.path.join(directory, "templates", "*.yaml")
        for manifest in glob.glob(manifests_path):
            manifest_filename = ntpath.basename(manifest)
            rendered_manifest = check_output(["tide", "view", "-f", "templates/" + manifest_filename, directory])
            with io.TextIOWrapper(io.BytesIO(rendered_manifest)) as stream:
                manifests[manifest] = dict(
                    resources=[resource for resource in load_all(stream)],
                    commit=self.repo.iter_commits(paths=manifest).next()
                )

        raise Return(manifests)
Beispiel #8
0
def check(repo_name):
    # global commit_cnt
    # global changed_files_cnt
    # global all_commit_cnt
    # global all_changed_cnt
    global multi_lang
    global other

    shas = set()
    repo = Repo(os.path.join(work_dir, repo_name))
    for b in repo.remote().fetch():
        if '/' not in b.name:
            continue
        print("start to check branch {} of {}".format(b.name, repo_name))
        # branch_name = b.name.split('/')[1]
        # repo.git.checkout('-B', branch_name, b.name)
        commits = list(repo.iter_commits('remotes/' + b.name))
        for idx, commit in enumerate(commits):
            if str(commit) in shas:
                continue
            else:
                shas.add(str(commit))
            if len(commits[idx - 1].parents) > 1:
                continue
            if idx == 0:
                continue
            xml_cnt = 0
            kot_jav_cnt = 0

            # if idx == len(commits)-1:
            file_list = list(commits[idx - 1].stats.files)
            dir_set = set()
            for file in file_list:
                file = file.split(' => ')[-1]
                dir = file.split('/')[0]
                if dir == file:
                    dir_set.add('.')
                else:
                    dir_set.add(dir)
                if len(file) > 4 and file[-4:] == '.xml':
                    xml_cnt += 1
                elif len(file) > 3 and file[-3:] == '.kt' or len(
                        file) > 5 and file[-5:] == '.java':
                    kot_jav_cnt += 1

            dir_cnt = len(dir_set)

            patch = repo.git.diff(commit.tree,
                                  commits[idx - 1].tree).split('\n')
            hunks_cnt = 0
            added = 0
            deleted = 0
            for line in patch:
                if len(line) >= 2 and line[0] == '@' and line[1] == '@':
                    hunks_cnt += 1
                elif len(line) >= 1 and line[0] == '+' and (len(line) < 3
                                                            or line[1] != '+'
                                                            or line[2] != '+'):
                    added += 1
                elif len(line) >= 1 and line[0] == '-' and (len(line) < 3
                                                            or line[1] != '-'
                                                            or line[2] != '-'):
                    deleted += 1

            commit_type = ""
            if xml_cnt >= 1 and kot_jav_cnt >= 1:
                commit_type = "Multi-lang"
                multi_lang += 1
            else:
                commit_type = "Other"
                other += 1

            with open(os.path.join(csv_dir, metric_type[0] + ".csv"),
                      'a',
                      newline="") as csv_fd:
                csv_writer = csv.writer(csv_fd,
                                        delimiter=',',
                                        quotechar='"',
                                        quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow([commit_type, len(file_list)])
            if len(file_list) == 0:
                print("!!" + str(commits[idx - 1]))
            with open(os.path.join(csv_dir, metric_type[1] + ".csv"),
                      'a',
                      newline="") as csv_fd:
                csv_writer = csv.writer(csv_fd,
                                        delimiter=',',
                                        quotechar='"',
                                        quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow([commit_type, hunks_cnt])
            with open(os.path.join(csv_dir, metric_type[2] + ".csv"),
                      'a',
                      newline="") as csv_fd:
                csv_writer = csv.writer(csv_fd,
                                        delimiter=',',
                                        quotechar='"',
                                        quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow([commit_type, added])
            with open(os.path.join(csv_dir, metric_type[3] + ".csv"),
                      'a',
                      newline="") as csv_fd:
                csv_writer = csv.writer(csv_fd,
                                        delimiter=',',
                                        quotechar='"',
                                        quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow([commit_type, deleted])
            with open(os.path.join(csv_dir, metric_type[4] + ".csv"),
                      'a',
                      newline="") as csv_fd:
                csv_writer = csv.writer(csv_fd,
                                        delimiter=',',
                                        quotechar='"',
                                        quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow([commit_type, dir_cnt])
        #     break
        # diff_index = commit.diff(commits[idx+1])

        # for diff_item in diff_index:
        #     if diff_item.a_path[-4:] == '.xml':
        #         xml_cnt += 1
        #     elif diff_item.a_path[-3:] == '.kt' or diff_item.a_path[-5:] == '.java':
        #         kot_jav_cnt += 1
        # if xml_cnt >= 1 and kot_jav_cnt >= 1:
        #     commit_cross += 1
    print("repo {} done".format(repo_name))
def getQRCommits(repo_name):
    global QRCommits_cnt
    repo_cnt = 0

    commits_list = []
    shas = set()
    repo = Repo(os.path.join(work_dir, repo_name))

    for b in repo.remote().fetch():
        if '/' not in b.name:
            continue
        print("start to check branch {} of {}".format(b.name, repo_name))
        branch_name = b.name.split('/')[1]
        repo.git.checkout('-B', branch_name, b.name)
        commits = list(repo.iter_commits(branch_name))

        for idx, commit in enumerate(commits):
            if str(commit) in shas:
                continue
            else:
                shas.add(str(commit))
            commit_collection = {}
            xml_cnt = 0
            kot_jav_cnt = 0

            file_list = list(commit.stats.files)
            for file in file_list:
                if len(file) > 4 and file[-4:] == '.xml':
                    xml_cnt += 1
                elif len(file) > 3 and file[-3:] == '.kt' or len(
                        file) > 5 and file[-5:] == '.java':
                    kot_jav_cnt += 1
            if xml_cnt >= 1 and kot_jav_cnt >= 1:
                if idx != 0:
                    valid = False
                    for f in list(commits[idx - 1].stats.files):
                        if '.xml' in f or '.kt' in f or '.java' in f:
                            valid = True
                            break
                    if not valid:
                        continue
                    time_next_commit = commits[
                        idx - 1].committed_datetime - commit.committed_datetime
                    if time_next_commit.total_seconds() / 60 <= 30 and len(
                            commit.parents) < 2 and len(
                                commits[idx - 1].parents) < 2:
                        QRCommits_cnt += 1
                        repo_cnt += 1
                        commit_collection["commit_id"] = str(commit)
                        commit_collection["commit_time"] = str(
                            commit.committed_datetime)
                        commit_collection["commit_msg"] = str.strip(
                            commit.message)
                        commit_collection["remedy_id"] = str(commits[idx - 1])
                        commit_collection["remedy_time"] = str(
                            commits[idx - 1].committed_datetime)
                        commit_collection["remedy_msg"] = str.strip(
                            commits[idx - 1].message)
                        commits_list.append(commit_collection)

            # diff_files = []
            # if not commit.parents:
            #     continue
            # else:
            #     for diff in commit.diff(commit.parents[0]):
            #         diff_file = {}
            #         diff_file["file_path"] = diff.a_path
            #         diff_file["change_type"] = diff.change_type
            #         diff_file["lang"] = os.path.splitext(diff.a_path)[1][1:]
            #         diff_files.append(diff_file)

            # commit_collection["diff_files"] = diff_files
            # commit_collection["parent_commit_num"] = len(commit.parents)

    repo_dump = json.dumps(commits_list, indent=2, ensure_ascii=False)
    with open(os.path.join(stats_dir, repo_name + "_" + str(repo_cnt)),
              "w") as commit_fd:
        commit_fd.write(repo_dump)
        print(repo_name + " done with {} commits".format(repo_cnt))
def computeRepo(repo_name):
    global issue_cnt
    global chinese_mst_cnt
    shas = set()
    print(8 * '=', "start to handle repo ", repo_name, 8 * '=')
    repo = Repo(os.path.join(repo_dir, repo_name))
    for b in repo.remote().fetch():
        if '/' not in b.name:
            continue
        print("start to check branch {} of {}".format(b.name, repo_name))
        branch_name = b.name.split('/')[1]
        repo.git.checkout('-B', branch_name, b.name)
        commits = list(repo.iter_commits(branch_name))

        for idx, commit in enumerate(commits):
            if str(commit) in shas:
                continue
            else:
                shas.add(str(commit))
            xml_cnt = 0
            kot_jav_cnt = 0

            for file in list(commit.stats.files):
                if len(file) > 4 and file[-4:] == '.xml':
                    xml_cnt += 1
                elif len(file) > 3 and file[-3:] == '.kt' or len(
                        file) > 5 and file[-5:] == '.java':
                    kot_jav_cnt += 1
            if xml_cnt >= 1 and kot_jav_cnt >= 1:
                msg = commit.message
                # msg = str.strip(
                #     translator.translate(msg,
                #                          lang_tgt='en')).lower()
                # print(msg)
                if reg_issue.search(msg) and len(commit.parents) < 2:
                    msg = "fix " + msg
                    issue_cnt += 1
                if (check_contain_chinese(msg)):
                    chinese_mst_cnt += 1
                    seg_list = jieba.cut(msg, cut_all=False)
                    cn = True
                else:
                    seg_list = wordninja.split(msg.lower())
                    cn = False
                for seg in seg_list:
                    if cn is True and check_contain_chinese(seg) is False:
                        seg_split_again = wordninja.split(seg)
                        for _seg in seg_split_again:
                            if len(
                                    _seg
                            ) > 1 and _seg != '\t' and _seg != '\n' and _seg != '\r\n' and _seg not in stop_words:
                                if _seg in word_bags:
                                    word_bags[_seg] += 1
                                else:
                                    word_bags[_seg] = 1
                    elif cn is True:
                        if len(
                                seg
                        ) > 1 and seg != '\t' and seg != '\n' and seg != '\r\n' and seg not in stop_words:
                            if seg in word_bags:
                                word_bags[seg] += 1
                            else:
                                word_bags[seg] = 1
                    else:
                        if len(
                                seg
                        ) > 1 and seg != '\t' and seg != '\n' and seg != '\r\n' and seg not in stop_words:
                            if seg in word_bags:
                                word_bags[seg] += 1
                            else:
                                word_bags[seg] = 1
    print(8 * '=', "repo ", repo_name, "done", 8 * '=')
Beispiel #11
0
class GitArchiver(BaseArchiver):
    """Gitpython implementation of the base archiver."""

    name = "git"

    def __init__(self, config):
        """
        Instantiate a new Git Archiver.

        :param config: The wily configuration
        :type  config: :class:`wily.config.WilyConfig`
        """
        try:
            self.repo = Repo(config.path)
        except git.exc.InvalidGitRepositoryError as e:
            raise InvalidGitRepositoryError from e

        self.config = config
        if self.repo.head.is_detached:
            self.current_branch = self.repo.head.object.hexsha
        else:
            self.current_branch = self.repo.active_branch
        assert not self.repo.bare, "Not a Git repository"

    def revisions(self, path: str, max_revisions: int) -> List[Revision]:
        """
        Get the list of revisions.

        :param path: the path to target.
        :type  path: ``str``

        :param max_revisions: the maximum number of revisions.
        :type  max_revisions: ``int``

        :return: A list of revisions.
        :rtype: ``list`` of :class:`Revision`
        """
        if self.repo.is_dirty():
            raise DirtyGitRepositoryError(self.repo.untracked_files)

        revisions = []
        for commit in self.repo.iter_commits(self.current_branch,
                                             max_count=max_revisions,
                                             reverse=True):
            tracked_files, tracked_dirs = get_tracked_files_dirs(
                self.repo, commit)
            if not commit.parents or not revisions:
                added_files = tracked_files
                modified_files = []
                deleted_files = []
            else:
                added_files, modified_files, deleted_files = whatchanged(
                    commit, self.repo.commit(commit.hexsha + "~1"))

            logger.debug(
                f"For revision {commit.name_rev.split(' ')[0]} found:")
            logger.debug(f"Tracked files: {tracked_files}")
            logger.debug(f"Tracked directories: {tracked_dirs}")
            logger.debug(f"Added files: {added_files}")
            logger.debug(f"Modified files: {modified_files}")
            logger.debug(f"Deleted files: {deleted_files}")

            rev = Revision(
                key=commit.name_rev.split(" ")[0],
                author_name=commit.author.name,
                author_email=commit.author.email,
                date=commit.committed_date,
                message=commit.message,
                tracked_files=tracked_files,
                tracked_dirs=tracked_dirs,
                added_files=added_files,
                modified_files=modified_files,
                deleted_files=deleted_files,
            )
            revisions.append(rev)
        return revisions[::-1]

    def checkout(self, revision: Revision, options: Dict):
        """
        Checkout a specific revision.

        :param revision: The revision identifier.
        :type  revision: :class:`Revision`

        :param options: Any additional options.
        :type  options: ``dict``
        """
        rev = revision.key
        self.repo.git.checkout(rev)

    def finish(self):
        """
        Clean up any state if processing completed/failed.

        For git, will checkout HEAD on the original branch when finishing
        """
        self.repo.git.checkout(self.current_branch)
        self.repo.close()

    def find(self, search: str) -> Revision:
        """
        Search a string and return a single revision.

        :param search: The search term.
        :type  search: ``str``

        :return: An instance of revision.
        :rtype: Instance of :class:`Revision`
        """
        commit = self.repo.commit(search)
        tracked_files, tracked_dirs = get_tracked_files_dirs(self.repo, commit)
        if not commit.parents:
            added_files = tracked_files
            modified_files = []
            deleted_files = []
        else:
            added_files, modified_files, deleted_files = whatchanged(
                commit, self.repo.commit(commit.hexsha + "~1"))

        return Revision(
            key=commit.name_rev.split(" ")[0],
            author_name=commit.author.name,
            author_email=commit.author.email,
            date=commit.committed_date,
            message=commit.message,
            tracked_files=tracked_files,
            tracked_dirs=tracked_dirs,
            added_files=added_files,
            modified_files=modified_files,
            deleted_files=deleted_files,
        )
Beispiel #12
0
class RepoExporter:
    def __init__(self, source_repo_path, source_repo_branch, target_repo_path, target_repo_relative_directory, target_repo_commit_branch):
        self.source_repo_path = source_repo_path
        self.source_repo_branch = source_repo_branch
        self.target_repo_path = target_repo_path
        self.target_repo_commit_branch = target_repo_commit_branch
        
        # Instantiate repo objects
        self.source_repo = Repo(self.source_repo_path)
        self.target_repo = Repo(self.target_repo_path)

        # saving the head of the source repo
        self.source_head = self.source_repo.head.name

        # extract repo names
        self.source_repo_name = get_repo_name(self.source_repo)
        self.target_repo_name = get_repo_name(self.target_repo)

        # export paths
        self.target_repo_archive_path_root = "{}/{}".format(COMMIT_EXPORT_PATH, self.source_repo_name)
        self.target_repo_archive_path_tar = "{}/{}".format(self.target_repo_archive_path_root, "commit_archive.tar")
        self.target_repo_directory = "{}/{}".format(self.target_repo_path, target_repo_relative_directory)

        # ingores to add at each commit
        self.ignores_to_add = extract_gitinore(self.source_repo_path)
    
        # initialize commit infos
        self._exact_commits()

    def _clean_tmp(self):
        rmtree(COMMIT_EXPORT_PATH, ignore_errors=True)

    def _prepare_export(self):
        # clean the previous export data, recreate the forders
        rmtree(COMMIT_EXPORT_PATH, ignore_errors=True)
        rmtree(self.target_repo_directory, ignore_errors=True)

        makedirs(self.target_repo_archive_path_root, exist_ok=True)
        #makedirs(self.target_repo_directory, exist_ok=True)


    def _exact_commits(self):
        # going on the source branch
        self.source_repo.git.checkout(self.source_repo_branch)

        commits_infos = []
        for commit in self.source_repo.iter_commits():
            commit_id = commit.hexsha
            commit_infos = {
               # "author": commit.author,
                "message": commit.message,
                "date": datetime.fromtimestamp(commit.authored_date).isoformat()
            }
            print("found_commit --> ", commit_infos)
            
            commits_infos.append((commit_id, commit_infos))

        self.commits_infos = list(reversed(commits_infos))
        
        # Returning on the defaut HEAD
        self.source_repo.git.checkout(self.source_head)

    def _add_ignores(self):
        gitignore_path = join(self.source_repo_path, ".gitignore")
        if self.ignores_to_add is not None:
            with open(gitignore_path, "a") as gitignore_file:
                gitignore_file.write(self.ignores_to_add)

    def extract_source_files(self, commit_id: str):
        # clean the export tmp directory
        self._prepare_export()
        self.source_repo.git.checkout(commit_id, force=True)

        # get all the files of the commit
        with open(self.target_repo_archive_path_tar, "wb+") as export_archive_io:
            self.source_repo.archive(export_archive_io)
        
        # extract all the source files of the commit and remove the archive tar
        archive_tar = tarfile.open(self.target_repo_archive_path_tar)
        archive_tar.extractall(self.target_repo_archive_path_root)
        unlink(self.target_repo_archive_path_tar)
        
    def move_source_to_target(self):
        # copy the commit source files to the target repo
        copytree(self.target_repo_archive_path_root, self.target_repo_directory)
        
        # Add problematic files to gitignore

    def commit_to_target(self, commit_infos: dict):
        # tries to checkout to branch, creates the branch if it doesn't exists
        try:
            self.target_repo.git.checkout(b=self.target_repo_commit_branch)
        
        except Exception:
            self.target_repo.git.checkout(self.target_repo_commit_branch)

        self.target_repo.git.stage(".")
        self.target_repo.git.commit(**commit_infos)


    def transfer_commits(self):
        for commit_id, commit_infos in self.commits_infos:
            print("transfering commit --> ", commit_infos["message"])
            
            # faking commit date ;)
            environ["GIT_AUTHOR_DATE"] = commit_infos["date"]
            environ["GIT_COMMITTER_DATE"] = commit_infos["date"]

            self.extract_source_files(commit_id)
            self.move_source_to_target()
            self.commit_to_target(commit_infos)
    


    def __del__(self):
        self._clean_tmp()
Beispiel #13
0
if sys.argv[1] == "transform":
    args = parse_transform()
    with open(".replay.json", "r") as f:
        name = json.load(f)["NAME"]
    dst = Path.home() / ".flor" / name / "repo.git"
    if dst.exists():
        shutil.rmtree(dst)
    transformed = Path.home() / ".flor" / name / "transformed"
    if not transformed.exists():
        transformed.mkdir()
    r = Repo()
    assert "flor.shadow" in str(r.active_branch)
    r.clone(dst)
    r = Repo(dst)
    commits = [
        c for c in r.iter_commits()
        if "flor.shadow" in c.message and ".json" == c.message[-len(".json"):]
    ]
    root = args.source.absolute()
    cwd = os.getcwd()
    os.chdir(dst)
    active = r.active_branch
    for version in commits:
        r.git.checkout(version)
        n = transformed / (str(version.hexsha) + "." + str(args.source))
        try:
            backprop(None, root, args.source, open(n, "w"))  # type: ignore
            print(
                f'transformed {(str(version.hexsha) + "::" + str(args.source))}'
            )
        except FileNotFoundError: