Example #1
0
    def check_commit_dependency(self, commit_dependency_data):
        '''
        Checks if the commit_dependency table contains the expected data
        given by self.commit_dependency in the unit test.
        :param commit_dependency_data:
        The data of the actual table:
        | id  | commitId | file | entityId | entityType | size | impl |
        :return:
        '''
        if self.commit_dependency is None:
            return

        conf = Configuration.load(self.codeface_conf, self.project_conf)
        dbm = DBManager(conf)
        project_id = dbm.getProjectID(conf["project"], self.tagging)

        def get_commit_id(commit_hash):
            return dbm.getCommitId(project_id, commit_hash)

        # remove the "id" column
        # so we have (commit_id, file, entityId, type, size, impl) tuples
        data = [(res[1], res[2], res[3], res[4], res[5], res[6])
                for res in commit_dependency_data]
        data_no_impl = [res[0:5] for res in data]

        expected_data = [(get_commit_id(res[0]), res[1], res[2], res[3],
                          res[4], res[5]) for res in self.commit_dependency]
        for expected in expected_data:
            if expected[5] is None:
                # don't check the impl
                self.assertIn(expected[0:5], data_no_impl)
            else:
                self.assertIn(expected, data)

        self.assertEqual(len(data), len(expected_data))
Example #2
0
 def getResults(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     project_id = dbm.getProjectID(conf["project"], self.tagging)
     self.assertGreaterEqual(project_id, 0)
     results = {}
     for table in self.result_tables:
         dbm.doExec("SELECT * FROM {table}".format(table=table))
         results[table] = dbm.doFetchAll()
     return results
 def setup_with_p(self, p):
     path = self.p.directory
     self.gitdir = dirname(path)
     self.resdir = pathjoin(path, ".git", "results")
     self.mldir = pathjoin(path, ".git")
     self.project_conf = self.p.codeface_conf
     self.no_report = False
     self.loglevel = "devinfo"
     self.logfile = pathjoin(path, ".git", "log")
     self.recreate = False
     # This config_file is added in the codeface test command handler
     self.codeface_conf = self.config_file
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     for table in pid_tables + other_tables:
         dbm.doExecCommit("DELETE FROM {}".format(table))
def run_extraction(conf, resdir, extract_commit_messages, extract_impl,
                   extract_on_range_level):
    """
    Runs the extraction process for the list of given parameters.

    :param conf: the Codeface configuration object
    :param resdir: the Codeface results dir, where output files are written
    """

    log.info("%s: Extracting data" % conf["project"])

    # initialize database manager with given configuration
    dbm = DBManager(conf)

    # get all types of extractions, both project-level and range-level
    __extractions_project, __extractions_range = extractions.get_extractions(
        dbm, conf, resdir, csv_writer, extract_commit_messages, extract_impl,
        extract_on_range_level)

    # run project-level extractions
    for extraction in __extractions_project:
        extraction.run()

    # run range-level extractions (only if explicitely enabled)
    if extract_on_range_level:

        # check if list of revisions in database is the same as in the config file
        revs = conf["revisions"]
        list_of_revisions = extractions.RevisionExtraction(
            dbm, conf, resdir, csv_writer).get_list()
        if revs:
            if set(revs) != set(list_of_revisions):
                log.error(
                    "List of revisions in configuration file do not match the list stored in the DB! Stopping now."
                )
                sys.exit(1)
            else:
                log.info(
                    "List of revisions in configuration file and DB match.")
        else:
            log.info(
                "No list of revisions found in configuration file, using the list from the DB instead!"
            )
            revs = list_of_revisions  # set list of revisions as stored in the database

        # for all revisions of this project
        for i in range(len(revs) - 1):
            start_rev = revs[i]
            end_rev = revs[i + 1]
            range_number = i + 1

            log.info("%s: Extracting data for range %s [version '%s']" %
                     (conf["project"], range_number, end_rev))

            for extraction in __extractions_range:
                extraction.run(range_number, start_rev, end_rev)

    log.info("Extraction complete!")
Example #5
0
 def checkEdges(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     project_id = dbm.getProjectID(conf["project"], self.tagging)
     persons = dbm.get_project_persons(project_id)
     # Create map from id to name
     person_map = {person[0]: person[1] for person in persons}
     given_correct_edges = self.correct_edges
     if given_correct_edges[0][0] is str:
         # simply check the first range
         given_correct_edges = [self.correct_edges]
     release_ranges = dbm.get_release_ranges(project_id)
     i = -1
     for correct_edges in given_correct_edges:
         i += 1
         release_range = release_ranges[i]
         cluster_id = dbm.get_cluster_id(project_id, release_range)
         edgelist = dbm.get_edgelist(cluster_id)
         # Create edge list with developer names
         test_edges = [[person_map[edge[0]], person_map[edge[1]], edge[2]]
                       for edge in edgelist]
         ## Check number of matches with known correct edges
         match_count = 0
         for test_edge in test_edges:
             if test_edge in correct_edges:
                 match_count += 1
         res = (match_count == len(correct_edges))
         self.assertTrue(
             res,
             msg="Project edgelist is incorrect for the v{}_release "
             "to v{}_release analysis!".format(i, i + 1))
Example #6
0
 def checkEdges(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     project_id = dbm.getProjectID(conf["project"], self.tagging)
     persons  = dbm.get_project_persons(project_id)
     # Create map from id to name
     person_map = {person[0] : person[1] for person in persons}
     given_correct_edges = self.correct_edges
     if given_correct_edges[0][0] is str:
         # simply check the first range
         given_correct_edges = [self.correct_edges]
     release_ranges = dbm.get_release_ranges(project_id)
     i = -1
     for correct_edges in given_correct_edges:
         i += 1
         release_range = release_ranges[i]
         cluster_id = dbm.get_cluster_id(project_id, release_range)
         edgelist = dbm.get_edgelist(cluster_id)
         # Create edge list with developer names
         test_edges = [[person_map[edge[0]], person_map[edge[1]], edge[2]] for edge in edgelist]
         ## Check number of matches with known correct edges
         match_count = 0
         for test_edge in test_edges:
             if test_edge in correct_edges:
                 match_count += 1
         res = (match_count == len(correct_edges))
         self.assertTrue(
             res,
             msg="Project edgelist is incorrect for the v{}_release "
                 "to v{}_release analysis!"
             .format(i, i+1))
 def checkClean(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     project_id = dbm.getProjectID(conf["project"], self.tagging)
     dbm.doExecCommit("DELETE FROM project WHERE id={}".format(project_id))
     for table in pid_tables:
         res = dbm.doExec("SELECT * FROM {table} WHERE projectId={pid}".
                          format(table=table, pid=project_id))
         self.assertEqual(res, 0, msg="Table '{}' still dirty!".
                              format(table))
     for table in other_tables:
         res = dbm.doExec("SELECT * FROM {table}".format(table=table))
         self.assertEqual(res, 0,  msg="Table '{}' still dirty!".format(table))
Example #8
0
    def check_commit_dependency(self, commit_dependency_data):
        '''
        Checks if the commit_dependency table contains the expected data
        given by self.commit_dependency in the unit test.
        :param commit_dependency_data:
        The data of the actual table:
        | id  | commitId | file | entityId | entityType | size | impl |
        :return:
        '''
        if self.commit_dependency is None:
            return

        conf = Configuration.load(self.codeface_conf, self.project_conf)
        dbm = DBManager(conf)
        project_id = dbm.getProjectID(conf["project"], self.tagging)

        def get_commit_id(commit_hash):
            return dbm.getCommitId(project_id, commit_hash)

        # remove the "id" column
        # so we have (commit_id, file, entityId, type, size, impl) tuples
        data = [(res[1], res[2], res[3], res[4], res[5], res[6])
                for res in commit_dependency_data]
        data_no_impl = [res[0:5] for res in data]

        expected_data = [(get_commit_id(res[0]), res[1], res[2], res[3],
                          res[4], res[5])
                         for res in self.commit_dependency]
        for expected in expected_data:
            if expected[5] is None:
                # don't check the impl
                self.assertIn(expected[0:5], data_no_impl)
            else:
                self.assertIn(expected, data)

        self.assertEqual(len(data), len(expected_data))
Example #9
0
 def getResults(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     project_id = dbm.getProjectID(conf["project"], self.tagging)
     self.assertGreaterEqual(project_id, 0)
     results = {}
     for table in self.result_tables:
         dbm.doExec("SELECT * FROM {table}".format(table=table))
         results[table] = dbm.doFetchAll()
     return results
Example #10
0
 def checkClean(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     project_id = dbm.getProjectID(conf["project"], self.tagging)
     dbm.doExecCommit("DELETE FROM project WHERE id={}".format(project_id))
     for table in pid_tables:
         res = dbm.doExec("SELECT * FROM {table} WHERE projectId={pid}".
                          format(table=table, pid=project_id))
         self.assertEqual(res, 0, msg="Table '{}' still dirty!".
                              format(table))
     for table in other_tables:
         res = dbm.doExec("SELECT * FROM {table}".format(table=table))
         self.assertEqual(res, 0,  msg="Table '{}' still dirty!".format(table))
 def checkEdges(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     project_id = dbm.getProjectID(conf["project"], self.tagging)
     cluster_id = dbm.get_cluster_id(project_id)
     edgelist = dbm.get_edgelist(cluster_id)
     persons  = dbm.get_project_persons(project_id)
     # Create map from id to name
     person_map = {person[0] : person[1] for person in persons}
     # Create edge list with developer names
     test_edges = [[person_map[edge[0]], person_map[edge[1]], edge[2]] for edge in edgelist]
     ## Check number of matches with known correct edges
     match_count = 0
     for test_edge in test_edges:
         if test_edge in self.correct_edges:
             match_count += 1
     res = (match_count == len(self.correct_edges))
     self.assertTrue(res, msg="Project edgelist is incorrect!")
def insert_user_data(issues, conf):
    """Insert user data into database ad update issue data.

    :param issues: the issues to retrieve user data from
    :param conf: the project configuration
    :return: the updated issue data
    """

    log.info("Syncing users with ID service...")

    # create buffer for users
    user_buffer = dict()
    # open database connection
    dbm = DBManager(conf)
    # open ID-service connection
    idservice = idManager(dbm, conf)

    def get_user_string(name, email):
        if not email or email is None:
            return "{name}".format(name=name)
            # return "{name} <{name}@default.com>".format(name=name)  # for debugging only
        else:
            return "{name} <{email}>".format(name=name, email=email)

    def get_or_update_user(user, buffer_db=user_buffer):
        # fix encoding for name and e-mail address
        if user["name"] is not None:
            name = unicode(user["name"]).encode("utf-8")
        else:
            name = unicode(user["username"]).encode("utf-8")
        mail = unicode(user["email"]).encode("utf-8")
        # construct string for ID service and send query
        user_string = get_user_string(name, mail)

        # check buffer to reduce amount of DB queries
        if user_string in buffer_db:
            log.devinfo("Returning user '{}' from buffer.".format(user_string))
            return buffer_db[user_string]

        # get person information from ID service
        log.devinfo("Passing user '{}' to ID service.".format(user_string))
        idx = idservice.getPersonID(user_string)

        # update user data with person information from DB
        person = idservice.getPersonFromDB(idx)
        user["email"] = person["email1"]  # column 'email1'
        user["name"] = person["name"]  # column 'name'
        user["id"] = person["id"]  # column 'id'

        # add user information to buffer
        # user_string = get_user_string(user["name"], user["email"]) # update for
        buffer_db[user_string] = user

        return user

    for issue in issues:
        # check database for issue author
        issue["user"] = get_or_update_user(issue["user"])

        # check database for event authors
        for event in issue["eventsList"]:
            # get the event user from the DB
            event["user"] = get_or_update_user(event["user"])
            # get the reference-target user from the DB if needed
            if event["ref_target"] != "":
                event["ref_target"] = get_or_update_user(event["ref_target"])

    return issues
Example #13
0
 def clear_tables(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     for table in self.result_tables:
         dbm.doExecCommit("DELETE FROM {}".format(table))
Example #14
0
 def clear_tables(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     for table in self.result_tables:
         dbm.doExecCommit("DELETE FROM {}".format(table))
Example #15
0
def insert_user_data(issues, conf):
    """
    Insert user data into database and update issue data.

    :param issues: the issues to retrieve user data from
    :param conf: the project configuration
    :return: the updated issue data
    """

    log.info("Syncing users with ID service...")

    # create buffer for users (key: user id)
    user_buffer = dict()
    # create buffer for user ids (key: user string)
    user_id_buffer = dict()
    # open database connection
    dbm = DBManager(conf)
    # open ID-service connection
    idservice = idManager(dbm, conf)

    def get_user_string(name, email):
        if not email or email is None:
            return "{name}".format(name=name)
            # return "{name} <{name}@default.com>".format(name=name)  # for debugging only
        else:
            return "{name} <{email}>".format(name=name, email=email)

    def get_id_and_update_user(user, buffer_db_ids=user_id_buffer):
        # fix encoding for name and e-mail address
        if user["name"] is not None and user["name"] != "":
            name = unicode(user["name"]).encode("utf-8")
        else:
            name = unicode(user["username"]).encode("utf-8")
        mail = unicode(user["email"]).encode("utf-8")  # empty
        # construct string for ID service and send query
        user_string = get_user_string(name, mail)

        # check buffer to reduce amount of DB queries
        if user_string in buffer_db_ids:
            log.devinfo(
                "Returning person id for user '{}' from buffer.".format(
                    user_string))
            return buffer_db_ids[user_string]

        # get person information from ID service
        log.devinfo("Passing user '{}' to ID service.".format(user_string))
        idx = idservice.getPersonID(user_string)

        # add user information to buffer
        # user_string = get_user_string(user["name"], user["email"]) # update for
        buffer_db_ids[user_string] = idx

        return idx

    def get_user_from_id(idx, buffer_db=user_buffer):

        # check whether user information is in buffer to reduce amount of DB queries
        if idx in buffer_db:
            log.devinfo("Returning user '{}' from buffer.".format(idx))
            return buffer_db[idx]

        # get person information from ID service
        log.devinfo("Passing user id '{}' to ID service.".format(idx))
        person = idservice.getPersonFromDB(idx)
        user = dict()
        user["email"] = person["email1"]  # column "email1"
        user["name"] = person["name"]  # column "name"
        user["id"] = person["id"]  # column "id"

        # add user information to buffer
        buffer_db[idx] = user

        return user

    # check and update database for all occurring users
    for issue in issues:
        # check database for issue author
        issue["author"] = get_id_and_update_user(issue["author"])

        # check database for comment authors
        for comment in issue["comments"]:
            comment["author"] = get_id_and_update_user(comment["author"])

        # check database for event authors in the history
        for event in issue["history"]:
            event["author"] = get_id_and_update_user(event["author"])

            # check database for target user if needed
            if event["event"] == "assigned":
                assigned_user = get_id_and_update_user(
                    create_user(event["event_info_1"], "",
                                event["event_info_2"]))
                event["event_info_1"] = assigned_user

    # get all users after database updates having been performed
    for issue in issues:
        # get issue author
        issue["author"] = get_user_from_id(issue["author"])

        # get comment authors
        for comment in issue["comments"]:
            comment["author"] = get_user_from_id(comment["author"])

        # get event authors for non-comment events
        for event in issue["history"]:
            event["author"] = get_user_from_id(event["author"])

            # get target user if needed
            if event["event"] == "assigned":
                assigned_user = get_user_from_id(event["event_info_1"])
                event["event_info_1"] = assigned_user["name"]
                event["event_info_2"] = assigned_user["email"]

    log.debug("number of issues after insert_user_data: '{}'".format(
        len(issues)))
    return issues
Example #16
0
def insert_user_data(issues, conf, resdir):
    """
    Insert user data into database and update issue data.
    In addition, dump username-to-user list to file.

    :param issues: the issues to retrieve user data from
    :param conf: the project configuration
    :param resdir: the directory in which the username-to-user-list should be dumped
    :return: the updated issue data
    """

    log.info("Syncing users with ID service...")

    # create buffer for users (key: user id)
    user_buffer = dict()
    # create buffer for user ids (key: user string)
    user_id_buffer = dict()
    # create buffer for usernames (key: username)
    username_id_buffer = dict()
    # open database connection
    dbm = DBManager(conf)
    # open ID-service connection
    idservice = idManager(dbm, conf)

    def get_user_string(name, email):
        if not email or email is None:
            return "{name}".format(name=name)
            # return "{name} <{name}@default.com>".format(name=name)  # for debugging only
        else:
            return "{name} <{email}>".format(name=name, email=email)

    def get_id_and_update_user(user,
                               buffer_db_ids=user_id_buffer,
                               buffer_usernames=username_id_buffer):
        username = unicode(user["username"]).encode("utf-8")

        # fix encoding for name and e-mail address
        if user["name"] is not None:
            name = unicode(user["name"]).encode("utf-8")
        else:
            name = username
        mail = unicode(user["email"]).encode("utf-8")
        # construct string for ID service and send query
        user_string = get_user_string(name, mail)

        # check buffer to reduce amount of DB queries
        if user_string in buffer_db_ids:
            log.devinfo(
                "Returning person id for user '{}' from buffer.".format(
                    user_string))
            if username is not None:
                buffer_usernames[username] = buffer_db_ids[user_string]
            return buffer_db_ids[user_string]

        # get person information from ID service
        log.devinfo("Passing user '{}' to ID service.".format(user_string))
        idx = idservice.getPersonID(user_string)

        # add user information to buffer
        # user_string = get_user_string(user["name"], user["email"]) # update for
        buffer_db_ids[user_string] = idx

        # add id to username buffer
        if username is not None:
            buffer_usernames[username] = idx

        return idx

    def get_user_from_id(idx, buffer_db=user_buffer):

        # check whether user information is in buffer to reduce amount of DB queries
        if idx in buffer_db:
            log.devinfo("Returning user '{}' from buffer.".format(idx))
            return buffer_db[idx]

        # get person information from ID service
        log.devinfo("Passing user id '{}' to ID service.".format(idx))
        person = idservice.getPersonFromDB(idx)
        user = dict()
        user["email"] = person["email1"]  # column "email1"
        user["name"] = person["name"]  # column "name"
        user["id"] = person["id"]  # column "id"

        # add user information to buffer
        buffer_db[idx] = user

        return user

    # check and update database for all occurring users
    for issue in issues:
        # check database for issue author
        issue["user"] = get_id_and_update_user(issue["user"])

        # check database for event authors
        for event in issue["eventsList"]:
            event["user"] = get_id_and_update_user(event["user"])

            # check database for the reference-target user if needed
            if event["ref_target"] != "":
                event["ref_target"] = get_id_and_update_user(
                    event["ref_target"])

    # get all users after database updates having been performed
    for issue in issues:
        # get issue author
        issue["user"] = get_user_from_id(issue["user"])

        # get event authors
        for event in issue["eventsList"]:
            event["user"] = get_user_from_id(event["user"])

            # get the reference-target user if needed
            if event["ref_target"] != "":
                event["ref_target"] = get_user_from_id(event["ref_target"])
                event["event_info_1"] = event["ref_target"]["name"]
                event["event_info_2"] = event["ref_target"]["email"]

    # dump username, name, and e-mail to file
    lines = []
    for username in username_id_buffer:
        user = get_user_from_id(username_id_buffer[username])
        lines.append((username, user["name"], user["email"]))

    log.info("Dump username list to file...")
    username_dump = os.path.join(resdir, "usernames.list")
    csv_writer.write_to_csv(username_dump,
                            sorted(set(lines), key=lambda line: line[0]))

    return issues