def fillCommit_IssuesTable_Through_Commit(commits, projectName,subsystem,commit_rows ): 
    connection = getDBConn()
    cursor = connection .cursor()
    
    sql_Query = "SELECT Distinct IssueID FROM Subsystems_Issues WHERE SubsystemID = '"+subsystem+"'"        
    cursor.execute(sql_Query)  
    issues_rows = cursor.fetchall()
    
    for git_commit in commits:  
            message = git_commit['data']['message']
            for issue_row in issues_rows:
                issue_number = issue_row[0][issue_row[0].rfind('#')+1:]
                if re.search(r'\b' + ' #'+issue_number + r'\b', message) or  re.search(r'\b'+ ' \(#'+issue_number + r'\b', message): 
                #if re.search(r'\b' + issue_row[0] + r'\b', message):
                #if re.search(r'\b' + ' #'+issue_row[8] + r'\b', comments):  
                    for commit_row in commit_rows:
                        if commit_row[1] == git_commit['data']['commit']:
                            sql_Query = "INSERT IGNORE INTO commits_Issues (CommitID,IssueID) VALUES ('"+str(commit_row[0])+"','"+issue_row[0]+"') ON DUPLICATE KEY UPDATE CommitID = CommitID, IssueID = IssueID "
                            cursor.execute(sql_Query)
                            print("I am inserting"+"commit:"+str(commit_row[0])+"issue:"+issue_row[0])
                            sql_Query = "INSERT INTO Involvement (ID, ProjectID,Type,SourceURL,CommitID,IssueID,Time) VALUES (default,'"+projectName+"','10','"+commit_row[1]+"','"+str(commit_row[0])+"','"+issue_row[0]+"','"+str(commit_row[2])+"')"
                            cursor.execute(sql_Query)
    cursor.close()
    connection.commit()
    connection.close
def fillArtifacts_CommitsTable(commits, subsystem, projectName):    
    connection = getDBConn()
    cursor = connection.cursor()
    sql_Query = "SELECT * FROM artifact WHERE SubsystemId = '"+subsystem+"'"            
    cursor.execute(sql_Query)  
    artifact_rows = cursor.fetchall()
   
    sql_Query = "SELECT * FROM commit where ProjectId = '"+projectName+"'"
    cursor.execute(sql_Query) 
    commit_rows = cursor.fetchall()
       
    for commit in commits: 
        for commit_row in commit_rows:
            if commit_row[1] == commit["data"]["commit"]:
                commitID = commit_row[0]
        files = commit["data"]["files"]
        for file in files:
            fileURL = file['file']
            artifactID = ""
            for row in artifact_rows:       
                if row[1] == fileURL:
                    artifactID = row[0] 
                    print("Inserting artifact"+str(artifactID))    
                    print("Inserting Commit"+str(commitID))       
                    sql_Query = "INSERT IGNORE INTO artifacts_commits (ArtifactID,CommitID) VALUES ('"+str(artifactID)+"','"+str(commitID)+"') ON DUPLICATE KEY UPDATE ArtifactID=ArtifactID,CommitID = CommitID "
                    #print("going to insert: "+ fileURL+" "+commit["data"]["commit"])
                    cursor.execute(sql_Query)    
    cursor.close()
    connection.commit()
    connection.close
    return commit_rows
def fillInvolvementTable_Dev_CommitInvolvement(projectName, unique_developers):     
    connection = getDBConn()
    cursor = connection .cursor()   
    sql_Query = "SELECT * FROM commit where ProjectId = '"+projectName+"'"
    cursor.execute(sql_Query) 
    commit_rows = cursor.fetchall()
      
    folder_component = dict()
    totalSubsystem = []
    
    with open('folder-component.csv') as csvfile:
         reader = csv.DictReader(csvfile)
         for row in reader:
             if row['project'] == projectName:
                 print(row['subsystem'], row['folder'])
                 folder_component.update({row['folder']:row['subsystem']})
                 totalSubsystem.append(row['subsystem'])
                
    for subsystem in totalSubsystem:
        print("subsystem"+ subsystem)           
        linkedFolders =[k for k,v in folder_component.items() if v == subsystem ]
        subSystemURLs = []      
        for folder in linkedFolders:   
            subSystemURLs.append(folder)
        repo_url = subSystemURLs[0]+ ".git" 
        repo_dir = "/tmp/"+subsystem+".git"        
        commits = getAllGitCommits(repo_url,repo_dir )   
        
        for commit in commits: 
            for commit_row in commit_rows:
                if commit_row[1] == commit["data"]["commit"]:
                    commitID = commit_row[0]
            nameAndEmail = commit["data"]["Author"]
            #name= nameAndEmail[:nameAndEmail.index("<")-1]
            email= nameAndEmail[nameAndEmail.find('<')+1:nameAndEmail.find('>')] 
            developerID = email
            for item in unique_developers:
                if item['Linked_Developer_Email'] == email:
                    #sql_Query = "INSERT INTO involvement (ID, ProjectID, UserID, Type, SourceURL, Time) VALUES (default,'"+projectName+"','"+item['Email']+"','10','"+commit["data"]["commit"]+"',from_unixtime("+str(commit["updated_on"])+"))"
                    developerID =  item['Email'] 
            # check if commit is already present then update user id else insert commit involvement entery
            sql_Query = "SELECT * FROM involvement where ProjectId = '"+projectName+"' AND CommitID = '"+str(commitID)+"'"
            cursor.execute(sql_Query) 
            involvement_rows = cursor.fetchall()
            if len(involvement_rows):
                for inv_row in involvement_rows:
                    sql = "UPDATE involvement SET UserID = %s WHERE ID = %s"
                    val = ( developerID, inv_row[0]  )
                    cursor.execute(sql, val)
                continue
            else:
                sql_Query = "INSERT INTO involvement (ID, ProjectID, UserID, Type, SourceURL, CommitID, Time) VALUES (default,'"+projectName+"','"+developerID+"','10','"+commit["data"]["commit"]+"','"+str(commitID)+"',from_unixtime("+str(commit["updated_on"])+"))"
                cursor.execute(sql_Query)  
         
    cursor.close()
    connection.commit()
    connection.close
def fillInvolvemenTable_Dev_IssueInvolvement(repo_owner, projectName, unique_developers):
    connection = getDBConn()   
    cursor = connection .cursor()   
    
    folder_component = dict()
    totalSubsystem = []
    
    with open('folder-component.csv') as csvfile:
         reader = csv.DictReader(csvfile)
         for row in reader:
             if row['project'] == projectName:
                 print(row['subsystem'], row['folder'])
                 folder_component.update({row['folder']:row['subsystem']})
                 totalSubsystem.append(row['subsystem'])
                
    for subsystem in totalSubsystem:
        print("subsystem"+ subsystem)           
        linkedFolders =[k for k,v in folder_component.items() if v == subsystem ]
        subSystemURLs = []      
        for folder in linkedFolders:   
            subSystemURLs.append(folder)
        issuesAndPullRequests = getAllGitIssues_pullRequests(repo_owner,subsystem )  #from perceval.backends.core.github import GitHub             
        for pull_request_issues in issuesAndPullRequests.fetch():
            issue_prefix = repo_owner+'/'+subsystem+'#'
            issue_ = issue_prefix + str(pull_request_issues['data']['number'])
            print(pull_request_issues['data']['number'])
            # Git reporter = first commenter    
            developer_reporter = pull_request_issues['data']['user_data']['login']
            creation_date = pull_request_issues['data']['created_at'][:pull_request_issues['data']['created_at'].find("Z")] 
            for item in unique_developers:
                if item['Linked_Developer_Email'] == developer_reporter:
                    developer_reporter = item['Email']
            sql_Query = "INSERT INTO involvement (ID, ProjectID, UserID, Type, SourceURL, IssueID, Time) VALUES (default,'"+projectName+"','"+developer_reporter+"','5','"+issue_+"','"+issue_+"',STR_TO_DATE('"+creation_date+"','%Y-%m-%dT%H:%i:%s'))"
            cursor.execute(sql_Query)    
                
            allcomments =  pull_request_issues['data']['comments_data']  
            for comment in allcomments:
                developer_pullrequest_commenter = comment['user_data']['login']
                for item in unique_developers:
                    if item['Linked_Developer_Email'] == developer_pullrequest_commenter:
                        developer_pullrequest_commenter = item['Email']        
                comment_date = comment['created_at'][:comment['created_at'].find("Z")] 
                if comment_date.startswith( '-03-25T',4) :
                    comment_date = comment_date[0:11]+ "14:54:51"
                print(issue_)
                sql_Query = "INSERT INTO involvement (ID, ProjectID, UserID, Type,SourceURL, IssueID, Time) VALUES (default,'"+projectName+"','"+developer_pullrequest_commenter+"','5','"+issue_+"','"+issue_+"',STR_TO_DATE('"+comment_date+"','%Y-%m-%dT%H:%i:%s'))"
                cursor.execute(sql_Query)      
    cursor.close()
    connection.commit()
    connection.close
def fillCommitTable(commits, projectName, listOfNamesAndEmail):  
    connection = getDBConn()
    cursor = connection .cursor()    
    for commit in commits: 
        nameAndEmail = commit["data"]["Author"]
        name= nameAndEmail[:nameAndEmail.index("<")-1]
        email= nameAndEmail[nameAndEmail.find('<')+1:nameAndEmail.find('>')]
        listOfNamesAndEmail.append({'name':name,'email':email})
        sql_Query = "INSERT IGNORE INTO commit (CommitID,CommitURL, Date, ProjectID) VALUES (default,'"+commit["data"]["commit"]+"',from_unixtime("+str(commit["updated_on"])+"),'"+projectName+"') ON DUPLICATE KEY UPDATE CommitID=CommitID"
        cursor.execute(sql_Query)  
        sql_Query = "UPDATE commit SET CommitJSONContent = %s WHERE CommitURL = %s"
        val = (str(json.dumps(commit)), commit["data"]["commit"])
        cursor.execute(sql_Query, val)
    cursor.close()
    connection.commit()
    connection.close
def fillSubsystemTable(projectName, repo_owner, listOfNamesAndEmail):                 
    folder_component = dict()
    totalSubsystem = []
    
    with open('folder-component.csv') as csvfile:
         reader = csv.DictReader(csvfile)
         for row in reader:
             if row['project'] == projectName:
                 print(row['subsystem'], row['folder'])
                 folder_component.update({row['folder']:row['subsystem']})
                 totalSubsystem.append(row['subsystem'])
                
    for subsystem in totalSubsystem:
        connection = getDBConn()
        print("subsystem "+ subsystem)           
        linkedFolders =[k for k,v in folder_component.items() if v == subsystem ]
        subSystemURLs = []      
        for folder in linkedFolders:   
            subSystemURLs.append(folder)
        my_json_string = json.dumps(subSystemURLs)
        repo_url = subSystemURLs[0]+ ".git" 
        cursor = connection .cursor()  
        sql_Query = "INSERT IGNORE INTO subsystem (SubsystemID,ProjectID,Name,SubsystemURLs) VALUES ('"+subsystem+"','"+projectName+"','"+subsystem+"','"+my_json_string+"') ON DUPLICATE KEY UPDATE SubsystemID=SubsystemID"
        cursor.execute(sql_Query)   
        cursor.close()
        connection.commit()
        connection.close
        
        repo_dir = "/tmp/"+subsystem+".git"
        
        commits = getAllGitCommits(repo_url,repo_dir )
        issuesAndPullRequests = getAllGitIssues_pullRequests(repo_owner,subsystem )
        print("i fetched everything fine")
        fillCommitTable(commits, projectName, listOfNamesAndEmail)
        print("fillCommitTable fine")
        fillArtifactTable(commits, projectName, subsystem)
        print("fillArtifactTable fine")
        commit_rows = fillArtifacts_CommitsTable(commits, subsystem, projectName)
        print(len(commit_rows))
        print("fillArtifacts_CommitsTable fine")
        fillIssueTable_GitIssues_And_PullRequests(issuesAndPullRequests, projectName, repo_owner, subsystem, listOfNamesAndEmail)
        print("fillIssueTable_GitIssues_And_PullRequests fine")
        fillCommits_IssuesTable_Through_Issue(issuesAndPullRequests, repo_owner,  projectName, subsystem, commit_rows)
        print("fillCommits_IssuesTable_Through_Issue fine")
        fillCommit_IssuesTable_Through_Commit(commits, projectName, subsystem, commit_rows )
        print("fillCommit_IssuesTable_Through_Commit fine")    
def fillArtifactTable(commits, projectName, subsystem):
    connection = getDBConn()
    cursor = connection .cursor()
    for commit in commits: 
        files = commit["data"]["files"]
        for file in files:
            fileURL = file['file']
            if fileURL.rfind("/") != -1:
                fileName = fileURL[fileURL.rfind("/")+1:] 
            else:
                fileName = fileURL 
            fileName = fileName.replace("'", '')  
            fileName = fileName.replace(",", '')  
            fileURL = fileURL.replace("'", '')  
            fileURL = fileURL.replace(",", '')  
            sql_Query = "INSERT IGNORE INTO artifact (ArtifactID,ArtifactURL,SubsystemID,Name,ProjectID) VALUES (default,'"+fileURL+"','"+subsystem+"','"+fileName+"','"+projectName+"') ON DUPLICATE KEY UPDATE ArtifactID=ArtifactID"
            cursor.execute(sql_Query)    
    cursor.close()
    connection.commit()
    connection.close
def fillDeveloperTable(listOfNamesAndEmail, projectName):  
    
    connection = getDBConn()
    cursor = connection .cursor()
    # remove duplicate developers            
    listOfNamesAndEmail = [dict(t) for t in {tuple(d.items()) for d in listOfNamesAndEmail}]           
    # transform developers data to feed de-Duplication algo
    transformation(listOfNamesAndEmail)     
    # run deduplication 
    duplicatePairs = deDuplication()
    # generate developers and linked developers csv
    generateLinkedDevelopers(duplicatePairs, listOfNamesAndEmail, projectName)   
    # store unique developers in DB
    with open('developer_LinkedDevelopers.csv', encoding="utf-8") as f:
                reader = csv.DictReader(f)
                rows = []
                for row in reader:
                    if row['Project'] == projectName:
                        row_name = row['DisplayName']
                        row_email = row['Email']
                        row_Linked_Developer_Name = row['Linked_Developer_DisplayName']
                        row_Linked_Developer_email = row['Linked_Developer_Email']
                        rows.append({'DisplayName':row_name,'Email':row_email,'Linked_Developer_DisplayName': row_Linked_Developer_Name,'Linked_Developer_Email':row_Linked_Developer_email})
    
    for developer in listOfNamesAndEmail:
        found = False
        for item in rows:
            if item['Linked_Developer_Email'] == developer['email']:
                sql_Query = "INSERT IGNORE INTO developer (UserID,Email,DisplayName,ProjectID) VALUES ('"+item['Email']+"','"+item['Email']+"','"+item['DisplayName']+"','"+projectName+"') ON DUPLICATE KEY UPDATE UserID=UserID"
                cursor.execute(sql_Query)
                found = True         
        if found == False:
            developer['name'] = developer['name'].replace("'",' ')
            sql_Query = "INSERT IGNORE INTO developer (UserID,Email,DisplayName,ProjectID) VALUES ('"+developer['email']+"','"+developer['email']+"','"+developer['name']+"','"+projectName+"') ON DUPLICATE KEY UPDATE UserID=UserID"
            cursor.execute(sql_Query)
            
    cursor.close()
    connection.commit()
    connection.close
    return rows
def fillCommits_IssuesTable_Through_Issue(issuesAndPullRequests, repo_owner,  projectName, subsystem, commit_rows): 
    connection = getDBConn()
    cursor = connection .cursor()
    #### pull requests have commits attached to them
    for pull in issuesAndPullRequests.fetch(category='pull_request'):
        data = pull['data']
        #title = data['title']
        print(data['number'])
        issue_prefix = repo_owner+'/'+subsystem+'#'
        issue_ = issue_prefix+str(data['number'])
        linkedCommits = data['commits_data']
        for linkedCommit in linkedCommits:
            for commit_row in commit_rows:
                if commit_row[1] == linkedCommit:
                    print(linkedCommit)
                    print(commit_row[0])
                    sql_Query = "INSERT IGNORE INTO Commits_Issues (CommitID,IssueID) VALUES ('"+str(commit_row[0])+"','"+issue_+"') ON DUPLICATE KEY UPDATE CommitID = CommitID, IssueID = IssueID"
                    cursor.execute(sql_Query)
                    sql_Query = "INSERT INTO involvement (ID, ProjectID,Type,SourceURL,CommitID,IssueID,Time) VALUES (default,'"+projectName+"','10','"+issue_+"','"+str(commit_row[0])+"','"+issue_+"','"+str(commit_row[2])+"')"
                    cursor.execute(sql_Query) 
    cursor.close()
    connection.commit()
    connection.close
from communityGraphExtraction import convertIssueInvolvementToEdges
from communityGraphExtraction import getArtChangeDatesPerDev
from communityGraphExtraction import getMaxIssueInvolvementsPerUser
from communityGraphExtraction import getSubsystems
from communityGraphExtraction import getCrossSubsystemIssueLinks
from communityGraphExtraction import convertCrossSubsystemIssueLinksToEdges
from communityGraphExtraction import getCrossSubsystemDeveloperInvolvement
from communityGraphExtraction import convertSubsystemDeveloperInvolvementToEdges
from cypher_neo4j import graph2Cypher
from string import Template

import matplotlib.pyplot as plt
import sys

from sqlConnection import getDBConn
connection = getDBConn()

def generateDictForProjectWindow(projectId):
    graph = dict()
    overlapWindowInSeconds = 3600*24*30*4 #4months
    changes = getArtChangeDatesPerDev(projectId)
    graph['developerEdgesViaCommits'] = comparePairwiseDevArtifactChangeOverlap(changes, overlapWindowInSeconds)
    involvements = getMaxIssueInvolvementsPerUser(projectId)
    graph['developerEdgesViaIssues'] = convertIssueInvolvementToEdges(involvements)
    graph['developers'] = getDevelopersPerProject(projectId)   
    return graph

def generateDictForSubystems(projectID):
    graph = dict()
    graph['projectID'] = projectID
    (subsys, subsysParent) = getSubsystems(projectID)
def fillIssueTable_GitIssues_And_PullRequests(issuesAndPullRequests, projectName, repo_owner, subsystem, listOfNamesAndEmail):            
    connection = getDBConn()
    cursor = connection .cursor()
    
    for item in issuesAndPullRequests.fetch():
        if 'pull_request' in item['data']:
            print(item['data']['number'])
            issue_prefix = repo_owner+'/'+subsystem+'#'
            creation_date = item['data']['created_at'][:item['data']['created_at'].find("Z")]  
            if item['data']['closed_at']is not None: 
                close_date = item['data']['closed_at'][:item['data']['closed_at'].find("Z")]
            else:
                close_date = ""
            sql_Query = "INSERT IGNORE INTO issue (IssueID,CreationDate,CloseDate,GitPullRequestURL,GitPullRequestName, ProjectID) VALUES ('"+issue_prefix+str(item['data']['number'])+"',STR_TO_DATE('"+creation_date+"','%Y-%m-%dT%H:%i:%s'),STR_TO_DATE('"+close_date+"','%Y-%m-%dT%H:%i:%s'),'"+item['data']['url']+"','"+str(item['data']['number'])+"','"+projectName+"') ON DUPLICATE KEY UPDATE IssueID=IssueID "
            cursor.execute(sql_Query)
            sql_Query = "INSERT IGNORE INTO subsystems_Issues (SubsystemID,IssueID) VALUES ('"+subsystem+"','"+issue_prefix+str(item['data']['number'])+"') ON DUPLICATE KEY UPDATE SubsystemID=SubsystemID, IssueID=IssueID"
            cursor.execute(sql_Query) 
            # Adding issue reporter
            userID= item['data']['user_data']['login']
            displayName = item['data']['user_data']['name']
            if not displayName:
                displayName =  userID
            listOfNamesAndEmail.append({'name':displayName,'email':userID}) 
            #Adding data of other members of the conversation
            issue_comments = list()
            issue_comments =  item['data']['comments_data']
            if len(issue_comments) != 0:
                # Adding Conversation starter's data only if ther are more than 1 comments
                userID= item['data']['user_data']['login']
                displayName = item['data']['user_data']['name']
                if not displayName:
                    displayName =  userID
                listOfNamesAndEmail.append({'name':displayName,'email':userID})
                for issue_comment in issue_comments:
                    userID= issue_comment['user_data']['login']
                    displayName = issue_comment['user_data']['name']
                    if not displayName:
                        displayName =  userID
                    listOfNamesAndEmail.append({'name':displayName,'email':userID})
            
        else:
            print(item['data']['number'])
            issue_prefix = repo_owner+'/'+subsystem+'#' 
            creation_date = item['data']['created_at'][:item['data']['created_at'].find("Z")]  
            if item['data']['closed_at']is not None: 
                close_date = item['data']['closed_at'][:item['data']['closed_at'].find("Z")]
            else:
                close_date = ""
            sql_Query = "INSERT IGNORE INTO issue (IssueID,CreationDate,CloseDate,GitIssueURL,GitIssueName, ProjectID) VALUES ('"+issue_prefix+str(item['data']['number'])+"',STR_TO_DATE('"+creation_date+"','%Y-%m-%dT%H:%i:%s'),STR_TO_DATE('"+close_date+"','%Y-%m-%dT%H:%i:%s'),'"+item['data']['url']+"','"+str(item['data']['number'])+"','"+projectName+"') ON DUPLICATE KEY UPDATE IssueID=IssueID "
            cursor.execute(sql_Query)
            sql_Query = "INSERT IGNORE INTO subsystems_Issues (SubsystemID,IssueID) VALUES ('"+subsystem+"','"+issue_prefix+str(item['data']['number'])+"') ON DUPLICATE KEY UPDATE SubsystemID=SubsystemID, IssueID=IssueID"
            cursor.execute(sql_Query) 
            # Adding issue reporter
            userID= item['data']['user_data']['login']
            displayName = item['data']['user_data']['name']
            if not displayName:
                displayName =  userID
            listOfNamesAndEmail.append({'name':displayName,'email':userID})
            
            #Adding data of other members of the conversation
            issue_comments = list()
            issue_comments =  item['data']['comments_data']
            if len(issue_comments) != 0:
                # Adding Conversation starter's data only if ther are more than 1 comments
                userID= item['data']['user_data']['login']
                displayName = item['data']['user_data']['name']
                if not displayName:
                    displayName =  userID
                listOfNamesAndEmail.append({'name':displayName,'email':userID})
                for issue_comment in issue_comments:
                    userID= issue_comment['user_data']['login']
                    displayName = issue_comment['user_data']['name']
                    if not displayName:
                        displayName =  userID
                    listOfNamesAndEmail.append({'name':displayName,'email':userID})
        sql = "UPDATE issue SET GitJSONContent = %s WHERE IssueID = %s"
        val = (str(json.dumps(item)), issue_prefix+str(item['data']['number']))
        cursor.execute(sql, val)
            
    cursor.close()
    connection.commit()
    connection.close