class GitHubUtil(): """ This class helps mapping the identities of the users in the vcs and GitHub """ def __init__(self, db_name, project_name, repo_name, github_repo_full_name, tokens, config, log_root_path): """ :type db_name: str :param db_name: the name of an existing DB :type project_name: str :param project_name: the name of an existing project in the DB :type repo_name: str :param repo_name: the name of an existing repository in the DB :type url: str :param url: full name of the GitHub repository :type tokens: list str :param token: list of GitHub tokens :type config: dict :param config: the DB configuration file :type log_root_path: str :param log_root_path: the log path """ self._log_path = log_root_path + "map-vcs-github-users-" + db_name + "-" + project_name + "-" + repo_name self._project_name = project_name self._db_name = db_name self._repo_name = repo_name self._tokens = tokens self._active_token = 0 self._url = github_repo_full_name config.update({'database': db_name}) self._config = config self._logging_util = LoggingUtil() self._logger = self._logging_util.get_logger(self._log_path) self._db_util = DbUtil() self._cnx = self._db_util.get_connection(self._config) self._git_dao = GitDao(self._config, self._logger) self._github_querier = GitHubQuerier(self._url, self._tokens[self._active_token], self._logger) def _change_token(self): if len(self._tokens) > 1: if not self._github_querier._token_util._is_usuable( self._tokens[self._active_token]): self._active_token = (self._active_token + 1) % len( self._tokens) self._github_querier = GitHubQuerier( self._url, self._tokens[self._active_token], self._logger) def _analyse_user(self, user, unmatched_user, sha): if user: user_name = self._github_querier.get_user_name(user) user_ids = self._db_util.select_all_user_ids_by_name( self._cnx, user_name, self._logger) for user_id in user_ids: try: user_id, alias_id = self._db_util._identify_user_and_alias( self._cnx, unmatched_user, user_id, self._logger) if user_id != alias_id: self._db_util.insert_user_alias( self._cnx, user_id, alias_id, self._logger) self._logger.info("user ids " + str(user_id) + " and " + str(alias_id) + " successfully matched") except Exception: self._logger.error("user ids " + str(user_id) + " and " + str(alias_id) + " not matched", exc_info=True) continue else: self._logger.warning("GitHub user not found for commit " + sha) def match(self): """ matches GitHub and Git identities """ try: self._fileHandler = self._logging_util.get_file_handler( self._logger, self._log_path, "info") self._logger.info("GitHubUtil started") start_time = datetime.now() repo_id = self._git_dao.select_repo_id(self._repo_name) user_ids = self._git_dao.select_all_developer_ids(repo_id) alias_ids = self._db_util.select_all_aliased_user_ids( self._cnx, self._logger) unmatched_users = list(set(user_ids) - set(alias_ids)) for unmatched_user in unmatched_users: matched = False sha = self._git_dao.select_sha_commit_by_user( unmatched_user, repo_id, match_on="author") if sha: author = self._github_querier.get_author_by_commit(sha) self._analyse_user(author, unmatched_user, sha) matched = True else: sha = self._git_dao.select_sha_commit_by_user( unmatched_user, repo_id, match_on="committer") if sha: committer = self._github_querier.get_committer_by_commit( sha) self._analyse_user(committer, unmatched_user, sha) matched = True if not matched: self._logger.warning("No commits found for user " + str(unmatched_user)) end_time = datetime.now() minutes_and_seconds = self._logging_util.calculate_execution_time( end_time, start_time) self._logger.info("GitHubUtil finished after " + str(minutes_and_seconds[0]) + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs") self._logging_util.remove_file_handler_logger( self._logger, self._fileHandler) except: self._logger.error("GitHubUtil failed", exc_info=True) finally: if self._git_dao: self._git_dao.close_connection() if self._cnx: self._db_util.close_connection(self._cnx)
class DbSchema(): """ This class initializes the DB schema """ def __init__(self, db_name, config, log_root_path): """ :type db_name: str :param db_name: the name of the DB to initialize/connect to, it cannot be null and must follow the format allowed in MySQL (http://dev.mysql.com/doc/refman/5.7/en/identifiers.html). If a DB having a name equal already exists in Gitana, the existing DB will be dropped and a new one will be created :type config: dict :param config: the DB configuration file :type log_root_path: str :param log_root_path: the log path """ self._db_name = db_name self._config = config self._log_root_path = log_root_path self._db_util = DbUtil() self._logging_util = LoggingUtil() log_path = self._log_root_path + "db-schema" + db_name self._logger = self._logging_util.get_logger(log_path) self._fileHandler = self._logging_util.get_file_handler(self._logger, log_path, "info") self._cnx = self._db_util.get_connection(self._config) def __del__(self): if self._cnx: self._db_util.close_connection(self._cnx) if self._logger: #deletes the file handler of the logger self._logging_util.remove_file_handler_logger(self._logger, self._fileHandler) def add_git_tables(self): """ initializes git tables if they do not exist """ self.set_database(self._db_name) self._init_git_tables() def add_issue_tracker_tables(self): """ initializes issue tracker tables if they do not exist """ self.set_database(self._db_name) self._init_shared_tables_issue_tracker_communication_channels() self._init_issue_tracker_tables() def add_instant_messaging_tables(self): """ initializes instant messaging tables if they do not exist """ self.set_database(self._db_name) self._init_shared_tables_issue_tracker_communication_channels() self._init_instant_messaging_tables() def add_forum_tables(self): """ initializes forum tables if they do not exist """ self.set_database(self._db_name) self._init_shared_tables_issue_tracker_communication_channels() self._init_forum_tables() def init_database(self, init_git, init_issue_tracker, init_forum, init_instant_messaging): """ initializes the database tables and functions :type init_git: bool :param init_git: if True, it initializes the tables containing git data :type init_issue_tracker: bool :param init_issue_tracker: if True, it initializes the tables containing issue tracker data :type init_forum: bool :param init_forum: if True, it initializes the tables containing forum data :type init_instant_messaging: bool :param init_instant_messaging: if True, it initializes the tables containing instant messaging data """ try: self._logger.info("init database started") start_time = datetime.now() self._create_database() self.set_database(self._db_name) self._set_settings() self._init_common_tables() if init_issue_tracker or init_forum or init_instant_messaging: self._init_shared_tables_issue_tracker_communication_channels() if init_git: self._init_git_tables() if init_issue_tracker: self._init_issue_tracker_tables() if init_forum: self._init_forum_tables() if init_instant_messaging: self._init_instant_messaging_tables() self._init_functions() self._logger.info("database " + self._db_name + " created") end_time = datetime.now() minutes_and_seconds = self._logging_util.calculate_execution_time(end_time, start_time) self._logger.info("Init database finished after " + str(minutes_and_seconds[0]) + " minutes and " + str(round(minutes_and_seconds[1], 1)) + " secs") except Exception: self._logger.error("init database failed", exc_info=True) def create_project(self, project_name): """ inserts a project in the DB :type project_name: str :param project_name: the name of the project to create """ self._cnx = self._db_util.get_connection(self._config) self._db_util.insert_project(self._cnx, self._db_name, project_name) self._db_util.close_connection(self._cnx) def create_repository(self, project_name, repo_name): """ inserts a repository in the DB :type project_name: str :param project_name: the name of an existing project :type repo_name: str :param repo_name: the name of the repository to insert """ self._cnx = self._db_util.get_connection(self._config) self.set_database(self._db_name) project_id = self._db_util.select_project_id(self._cnx, project_name, self._logger) try: self._db_util.insert_repo(self._cnx, project_id, repo_name, self._logger) except Exception: self._logger.error("repository " + repo_name + " not inserted", exc_info=True) self._db_util.close_connection(self._cnx) def match_user_identity(self, source_user_name, source_user_email, target_user_name, target_user_email): """ matchs the identity of a user to another one :type source_user_name: str :param source_user_name: the name of the source user :type source_target_name: str :param source_target_name: the email of the source user :type target_user_name: str :param target_user_name: the name of the target user :type target_user_email: str :param target_user_email: the email of the target user """ self._cnx = self._db_util.get_connection(self._config) self.set_database(self._db_name) if source_user_name and source_user_email: source_user_id = self._db_util.select_user_id_by_name_and_email(self._cnx, source_user_name, source_user_email, self._logger) else: if source_user_name: source_user_id = self._db_util.select_user_id_by_name(self._cnx, source_user_name, self._logger) else: source_user_id = self._db_util.select_user_id_by_email(self._cnx, source_user_email, self._logger) if target_user_name and target_user_email: target_user_id = self._db_util.select_user_id_by_name_and_email(self._cnx, target_user_name, target_user_email, self._logger) else: if target_user_name: target_user_id = self._db_util.select_user_id_by_name(self._cnx, target_user_name, self._logger) else: target_user_id = self._db_util.select_user_by_email(self._cnx, target_user_email, self._logger) if source_user_id and target_user_id: try: user_id, alias_id = self._db_util._identify_user_and_alias(self._cnx, source_user_id, target_user_id, self._logger) if user_id != alias_id: self._db_util.insert_user_alias(self._cnx, user_id, alias_id, self._logger) self._logger.info("user ids " + str(user_id) + " and " + str(alias_id) + " successfully matched") except Exception: self._logger.error("user ids " + str(source_user_id) + " and " + str(target_user_id) + " not matched", exc_info=True) else: self._logger.warning("source or target user not found in the DB") self._db_util.close_connection(self._cnx) def list_projects(self): """ lists all projects contained in the DB """ self._cnx = self._db_util.get_connection(self._config) project_names = [] self.set_database(self._db_name) cursor = self._cnx.cursor() query = "SELECT name FROM project" cursor.execute(query) row = cursor.fetchone() while row: project_names.append(row[0]) row = cursor.fetchone() cursor.close() return project_names def set_database(self, db_name): """ sets the DB used by the tool :type db_name: str :param db_name: the name of the DB """ try: self._logger.info("set database " + db_name + " started") self._db_util.set_database(self._cnx, db_name) self._logger.info("set database " + db_name + " finished") except Exception: self._logger.error("set database failed", exc_info=True) def _set_settings(self): #sets the settings (max connections, charset, file format, ...) used by the DB self._db_util.set_settings(self._cnx) def _create_database(self): #creates the database cursor = self._cnx.cursor() drop_database_if_exists = "DROP DATABASE IF EXISTS " + self._db_name cursor.execute(drop_database_if_exists) create_database = "CREATE DATABASE " + self._db_name cursor.execute(create_database) cursor.close() def _init_functions(self): #initializes functions cursor = self._cnx.cursor() levenshtein_distance = """ CREATE DEFINER=`root`@`localhost` FUNCTION `levenshtein_distance`(s1 VARCHAR(255) CHARACTER SET utf8, s2 VARCHAR(255) CHARACTER SET utf8) RETURNS int(11) DETERMINISTIC BEGIN DECLARE s1_len, s2_len, i, j, c, c_temp, cost INT; DECLARE s1_char CHAR CHARACTER SET utf8; -- max strlen=255 for this function DECLARE cv0, cv1 VARBINARY(256); SET s1_len = CHAR_LENGTH(s1), s2_len = CHAR_LENGTH(s2), cv1 = 0x00, j = 1, i = 1, c = 0; IF (s1 = s2) THEN RETURN (0); ELSEIF (s1_len = 0) THEN RETURN (s2_len); ELSEIF (s2_len = 0) THEN RETURN (s1_len); END IF; WHILE (j <= s2_len) DO SET cv1 = CONCAT(cv1, CHAR(j)), j = j + 1; END WHILE; WHILE (i <= s1_len) DO SET s1_char = SUBSTRING(s1, i, 1), c = i, cv0 = CHAR(i), j = 1; WHILE (j <= s2_len) DO SET c = c + 1, cost = IF(s1_char = SUBSTRING(s2, j, 1), 0, 1); SET c_temp = ORD(SUBSTRING(cv1, j, 1)) + cost; IF (c > c_temp) THEN SET c = c_temp; END IF; SET c_temp = ORD(SUBSTRING(cv1, j+1, 1)) + 1; IF (c > c_temp) THEN SET c = c_temp; END IF; SET cv0 = CONCAT(cv0, CHAR(c)), j = j + 1; END WHILE; SET cv1 = cv0, i = i + 1; END WHILE; RETURN (c); END""" soundex_match = """ CREATE DEFINER=`root`@`localhost` FUNCTION `soundex_match`(s1 VARCHAR(255) CHARACTER SET utf8, s2 VARCHAR(255) CHARACTER SET utf8) RETURNS int(1) DETERMINISTIC BEGIN DECLARE _result INT DEFAULT 0; IF SOUNDEX(s1) = SOUNDEX(s2) THEN SET _result = 1; END IF; RETURN _result; END""" cursor.execute(levenshtein_distance) cursor.execute(soundex_match) cursor.close() def _init_common_tables(self): #initializes common tables used by tables modeling git, issue tracker, forum and instant messaging data cursor = self._cnx.cursor() create_table_project = "CREATE TABLE IF NOT EXISTS project( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(255), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_user = "******" \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "email varchar(256), " \ "CONSTRAINT namem UNIQUE (name, email) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_user_alias = "CREATE TABLE IF NOT EXISTS user_alias ( " \ "user_id int(20), " \ "alias_id int(20), " \ "PRIMARY KEY st (user_id, alias_id), " \ "CONSTRAINT u UNIQUE (user_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_project) cursor.execute(create_table_user) cursor.execute(create_table_user_alias) def _init_shared_tables_issue_tracker_communication_channels(self): #initializes shared tables used by tables modeling issue tracker, forum and instant messaging data cursor = self._cnx.cursor() create_table_label = "CREATE TABLE IF NOT EXISTS label ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_message = "CREATE TABLE IF NOT EXISTS message ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "pos int(10), " \ "type_id int(20), " \ "issue_id int(20), " \ "topic_id int(20), " \ "channel_id int(20), " \ "pull_request_id int(20), " \ "body longblob, " \ "votes int(20), " \ "author_id int(20), " \ "created_at timestamp NULL DEFAULT NULL," \ "CONSTRAINT ip UNIQUE (issue_id, topic_id, channel_id, pull_request_id, own_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_message_dependency = "CREATE TABLE IF NOT EXISTS message_dependency ( " \ "source_message_id int(20), " \ "target_message_id int(20), " \ "PRIMARY KEY st (source_message_id, target_message_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_message_type = "CREATE TABLE IF NOT EXISTS message_type ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(255), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" insert_message_types = "INSERT IGNORE INTO message_type VALUES (NULL, 'question'), " \ "(NULL, 'answer'), " \ "(NULL, 'comment'), " \ "(NULL, 'accepted_answer'), " \ "(NULL, 'reply'), " \ "(NULL, 'file_upload'), " \ "(NULL, 'info');" create_table_attachment = "CREATE TABLE IF NOT EXISTS attachment ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "message_id int(20), " \ "name varchar(256), " \ "extension varchar(10), " \ "bytes int(20), " \ "url varchar(512), " \ "CONSTRAINT ip UNIQUE (message_id, own_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_label) cursor.execute(create_table_message) cursor.execute(create_table_message_dependency) cursor.execute(create_table_message_type) cursor.execute(insert_message_types) cursor.execute(create_table_attachment) cursor.close() def _init_git_tables(self): #initializes tables used to model git data cursor = self._cnx.cursor() create_table_repository = "CREATE TABLE IF NOT EXISTS repository( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "project_id int(20), " \ "name varchar(255), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_reference = "CREATE TABLE IF NOT EXISTS reference( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(255), " \ "type varchar(255), " \ "CONSTRAINT name UNIQUE (repo_id, name, type) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_commit = "CREATE TABLE IF NOT EXISTS commit(" \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "sha varchar(512), " \ "message varchar(512), " \ "author_id int(20), " \ "committer_id int(20), " \ "authored_date timestamp NULL DEFAULT NULL, " \ "committed_date timestamp NULL DEFAULT NULL, " \ "size int(20), " \ "INDEX sha (sha), " \ "INDEX auth (author_id), " \ "INDEX comm (committer_id), " \ "CONSTRAINT s UNIQUE (sha, repo_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_commit_parent = "CREATE TABLE IF NOT EXISTS commit_parent(" \ "repo_id int(20), " \ "commit_id int(20), " \ "commit_sha varchar(512), " \ "parent_id int(20), " \ "parent_sha varchar(512), " \ "PRIMARY KEY copa (repo_id, commit_id, parent_id), " \ "CONSTRAINT cshapsha UNIQUE (repo_id, commit_id, parent_sha) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_commits2reference = "CREATE TABLE IF NOT EXISTS commit_in_reference(" \ "repo_id int(20), " \ "commit_id int(20), " \ "ref_id int(20), " \ "PRIMARY KEY core (commit_id, ref_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_file = "CREATE TABLE IF NOT EXISTS file( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(512), " \ "ext varchar(255), " \ "CONSTRAINT rerena UNIQUE (repo_id, name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_file_renamed = "CREATE TABLE IF NOT EXISTS file_renamed ( " \ "repo_id int(20), " \ "current_file_id int(20), " \ "previous_file_id int(20), " \ "file_modification_id int(20), " \ "PRIMARY KEY cpc (current_file_id, previous_file_id, file_modification_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_file_modification = "CREATE TABLE IF NOT EXISTS file_modification ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "commit_id int(20), " \ "file_id int(20), " \ "status varchar(10), " \ "additions numeric(10), " \ "deletions numeric(10), " \ "changes numeric(10), " \ "patch longblob, " \ "CONSTRAINT cf UNIQUE (commit_id, file_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_line_detail = "CREATE TABLE IF NOT EXISTS line_detail( " \ "file_modification_id int(20)," \ "type varchar(25), " \ "line_number numeric(20), " \ "is_commented numeric(1), " \ "is_partially_commented numeric(1), " \ "is_empty numeric(1), " \ "content longblob, " \ "PRIMARY KEY fityli (file_modification_id, type, line_number) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_function = "CREATE TABLE function ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(512), " \ "file_id int(20), " \ "args int(3), " \ "loc int(20), " \ "token_count int(20), " \ "total_lines int(20), " \ "ccn int(20), " \ "start_line int(10), " \ "end_line int(10), " \ "CONSTRAINT name UNIQUE (name, file_id, start_line, end_line)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_function_at_commit = "CREATE TABLE function_at_commit ( " \ "commit_id int(20)," \ "function_id int(20), " \ "PRIMARY KEY cf (commit_id, function_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_code_at_commit = "CREATE TABLE code_at_commit ( " \ "commit_id int(20)," \ "file_id int(20), " \ "ccn int(10), " \ "loc int(10), " \ "commented_lines int(10), " \ "blank_lines int(10), " \ "funs int(10), " \ "tokens int(10), " \ "avg_ccn DECIMAL(5,2), " \ "avg_loc DECIMAL(5,2), " \ "avg_tokens DECIMAL(5,2), " \ "PRIMARY KEY cf (commit_id, file_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_repository) cursor.execute(create_table_reference) cursor.execute(create_table_commit) cursor.execute(create_table_commit_parent) cursor.execute(create_table_commits2reference) cursor.execute(create_table_file) cursor.execute(create_table_file_renamed) cursor.execute(create_table_file_modification) cursor.execute(create_table_line_detail) cursor.execute(create_table_function) cursor.execute(create_table_code_at_commit) cursor.execute(create_table_function_at_commit) cursor.close() def _init_issue_tracker_tables(self): #initializes tables used to model issue tracker data cursor = self._cnx.cursor() create_table_issue_tracker = "CREATE TABLE IF NOT EXISTS issue_tracker ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(512), " \ "type varchar(512), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue = "CREATE TABLE IF NOT EXISTS issue ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "issue_tracker_id int(20), " \ "summary varchar(512), " \ "component varchar(256), " \ "version varchar(256), " \ "hardware varchar(256), " \ "priority varchar(256), " \ "severity varchar(256), " \ "reference_id int(20), " \ "reporter_id int(20), " \ "created_at timestamp NULL DEFAULT NULL, " \ "last_change_at timestamp NULL DEFAULT NULL, " \ "CONSTRAINT ioi UNIQUE (issue_tracker_id, own_id), " \ "INDEX u (reporter_id), " \ "INDEX r (reference_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_pull_request = "CREATE TABLE IF NOT EXISTS pull_request ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "issue_id int(20), " \ "author_id int(20), " \ "state varchar(128), " \ "target_ref_id int(20), " \ "merged_at timestamp NULL DEFAULT NULL, " \ "merged_by int(20), " \ "UNIQUE u (issue_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_pull_request_commit = "CREATE TABLE IF NOT EXISTS pull_request_commit ( " \ "pr_id int(20), " \ "commit_id int(20), " \ "proposed_commit_id int(20), " \ "PRIMARY KEY pcp (pr_id, commit_id, proposed_commit_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_proposed_commit = "CREATE TABLE IF NOT EXISTS proposed_commit ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "sha varchar(512), " \ "message varchar(512), " \ "author_id int(20), " \ "committer_id int(20), " \ "authored_date timestamp NULL DEFAULT NULL, " \ "committed_date timestamp NULL DEFAULT NULL, " \ "INDEX sha (sha), " \ "INDEX auth (author_id), " \ "INDEX comm (committer_id), " \ "CONSTRAINT s UNIQUE (sha, repo_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_proposed_file_modification = "CREATE TABLE IF NOT EXISTS proposed_file_modification ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "proposed_commit_id int(20), " \ "file_id int(20), " \ "proposed_file_id int(20), " \ "status varchar(10), " \ "additions numeric(10), " \ "deletions numeric(10), " \ "changes numeric(10), " \ "patch longblob, " \ "CONSTRAINT rerena UNIQUE (proposed_commit_id, file_id, proposed_file_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_proposed_file = "CREATE TABLE IF NOT EXISTS proposed_file ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "repo_id int(20), " \ "name varchar(512), " \ "ext varchar(255), " \ "CONSTRAINT rerena UNIQUE (repo_id, name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_pull_request_review = "CREATE TABLE IF NOT EXISTS pull_request_review ( " \ "message_id int(20), " \ "pr_id int(20), " \ "file_id int(20), " \ "proposed_file_id int(20), " \ "PRIMARY KEY st (message_id, pr_id, file_id, proposed_file_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_assignee = "CREATE TABLE IF NOT EXISTS issue_assignee ( " \ "issue_id int(20), " \ "assignee_id int(20), " \ "PRIMARY KEY il (issue_id, assignee_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_subscriber = "CREATE TABLE IF NOT EXISTS issue_subscriber ( " \ "issue_id int(20), " \ "subscriber_id int(20), " \ "PRIMARY KEY il (issue_id, subscriber_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_event = "CREATE TABLE IF NOT EXISTS issue_event ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "issue_id int(20), " \ "event_type_id int(20), " \ "detail varchar(256), " \ "creator_id int(20), " \ "created_at timestamp NULL DEFAULT NULL, " \ "target_user_id int(20), " \ "CONSTRAINT iecc UNIQUE (issue_id, event_type_id, creator_id, created_at, detail) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_event_type = "CREATE TABLE IF NOT EXISTS issue_event_type ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_labelled = "CREATE TABLE IF NOT EXISTS issue_labelled ( " \ "issue_id int(20), " \ "label_id int(20), " \ "PRIMARY KEY il (issue_id, label_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_issue_commit_dependency = "CREATE TABLE IF NOT EXISTS issue_commit_dependency ( " \ "issue_id int(20), " \ "commit_id int(20), " \ "PRIMARY KEY ict (issue_id, commit_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_issue_dependency = "CREATE TABLE IF NOT EXISTS issue_dependency ( " \ "issue_source_id int(20), " \ "issue_target_id int(20), " \ "created_at timestamp NULL DEFAULT NULL, " \ "type_id int(20), " \ "PRIMARY KEY st (issue_source_id, issue_target_id, type_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_issue_dependency_type = "CREATE TABLE IF NOT EXISTS issue_dependency_type (" \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "name varchar(256), " \ "CONSTRAINT name UNIQUE (name) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" insert_issue_dependency_type = "INSERT IGNORE INTO issue_dependency_type VALUES (NULL, 'block'), " \ "(NULL, 'depends'), " \ "(NULL, 'related'), " \ "(NULL, 'duplicated');" cursor.execute(create_table_issue_tracker) cursor.execute(create_table_issue) cursor.execute(create_table_issue_assignee) cursor.execute(create_table_issue_subscriber) cursor.execute(create_table_issue_event) cursor.execute(create_table_issue_event_type) cursor.execute(create_table_issue_labelled) cursor.execute(create_issue_commit_dependency) cursor.execute(create_table_issue_dependency) cursor.execute(create_issue_dependency_type) cursor.execute(insert_issue_dependency_type) cursor.execute(create_table_pull_request) cursor.execute(create_table_pull_request_commit) cursor.execute(create_table_proposed_commit) cursor.execute(create_table_proposed_file_modification) cursor.execute(create_table_proposed_file) cursor.execute(create_table_pull_request_review) cursor.close() def _init_forum_tables(self): #initializes tables used to model forum data cursor = self._cnx.cursor() create_table_forum = "CREATE TABLE IF NOT EXISTS forum ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "project_id int(20), " \ "name varchar(512), " \ "type varchar(512), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_topic = "CREATE TABLE IF NOT EXISTS topic ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "forum_id int(20), " \ "name varchar(256), " \ "votes int(10), " \ "views int(10), " \ "created_at timestamp NULL DEFAULT NULL, " \ "last_change_at timestamp NULL DEFAULT NULL, " \ "CONSTRAINT name UNIQUE (forum_id, own_id)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_topic_labelled = "CREATE TABLE IF NOT EXISTS topic_labelled ( " \ "topic_id int(20), " \ "label_id int(20), " \ "PRIMARY KEY il (topic_id, label_id) " \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_forum) cursor.execute(create_table_topic) cursor.execute(create_table_topic_labelled) cursor.close() def _init_instant_messaging_tables(self): #initializes tables used to model instant messaging data cursor = self._cnx.cursor() create_table_instant_messaging = "CREATE TABLE IF NOT EXISTS instant_messaging ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "project_id int(20), " \ "name varchar(512), " \ "type varchar(512), " \ "CONSTRAINT name UNIQUE (name)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" create_table_channel = "CREATE TABLE IF NOT EXISTS channel ( " \ "id int(20) AUTO_INCREMENT PRIMARY KEY, " \ "own_id varchar(20), " \ "instant_messaging_id int(20), " \ "name varchar(256), " \ "description varchar(512), " \ "created_at timestamp NULL DEFAULT NULL, " \ "last_change_at timestamp NULL DEFAULT NULL, " \ "CONSTRAINT name UNIQUE (instant_messaging_id, own_id)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;" cursor.execute(create_table_instant_messaging) cursor.execute(create_table_channel) cursor.close()