def GetTopClosers(days, startdate, enddate, identities_db, filter, closed_condition, limit): affiliations = "" for aff in filter: affiliations += " com.name<>'" + aff + "' and " date_limit = "" if (days != 0): sql = "SELECT @maxdate:=max(changed_on) from changes limit 1" ExecuteQuery(sql) date_limit = " AND DATEDIFF(@maxdate, changed_on)<" + str(days) q = "SELECT up.id as id, up.identifier as closers, "+\ " count(distinct(c.id)) as closed "+\ "FROM "+GetTablesCompaniesITS(identities_db)+ ", "+\ " "+identities_db+".companies com, "+\ " "+identities_db+".upeople up "+\ "WHERE "+GetFiltersCompaniesITS() +" and "+\ " "+affiliations+ " "+\ " upc.company_id = com.id and "+\ " c.changed_by = pup.people_id and "+\ " pup.upeople_id = up.id and "+\ " c.changed_on >= "+ startdate+ " and "+\ " c.changed_on < "+ enddate+ " and " +\ " "+closed_condition+ " " + date_limit+ " "+\ "GROUP BY up.identifier "+\ "ORDER BY closed desc, closers "+\ "LIMIT "+ limit data = ExecuteQuery(q) return (data)
def GetTopAuthorsMediaWiki(days, startdate, enddate, identities_db, bots, limit): date_limit = "" filter_bots = '' for bot in bots: filter_bots += " user<>'" + bot + "' and " if (days != 0): ExecuteQuery("SELECT @maxdate:=max(date) from wiki_pages_revs limit 1") date_limit = " AND DATEDIFF(@maxdate, date)<" + str(days) q = "SELECT up.id as id, up.identifier as authors, "+\ " count(wiki_pages_revs.id) as reviews "+\ "FROM wiki_pages_revs, people_upeople pup, "+identities_db+".upeople up "+\ "WHERE "+ filter_bots+ " "+\ " wiki_pages_revs.user = pup.people_id and "+\ " pup.upeople_id = up.id and "+\ " date >= "+ startdate+ " and "+\ " date < "+ enddate+ " "+ date_limit+ " "+\ " GROUP BY authors "+\ " ORDER BY reviews desc, authors "+\ " LIMIT "+ limit data = ExecuteQuery(q) return (data)
def top_senders(days, startdate, enddate, identities_db, filter, limit): affiliations = "" if (not filter): filter = [] for aff in filter: affiliations = affiliations + " c.name<>'" + aff + "' and " date_limit = "" if (days != 0): sql = "SELECT @maxdate:=max(first_date) from messages limit 1" ExecuteQuery(sql) date_limit = " AND DATEDIFF(@maxdate,first_date)<" + str(days) q = "SELECT up.id as id, up.identifier as senders, "+\ "COUNT(distinct(m.message_id)) as sent "+\ "FROM "+ GetTablesCompanies(identities_db)+\ " ,"+identities_db+".upeople up "+\ "WHERE "+ GetFiltersCompanies()+ " AND "+\ " pup.upeople_id = up.id AND "+\ " "+ affiliations + " "+\ " m.first_date >= "+startdate+" AND "+\ " m.first_date < "+enddate +\ date_limit+ " "+\ "GROUP BY up.identifier "+\ "ORDER BY sent desc "+\ "LIMIT " + limit data = ExecuteQuery(q) return (data)
def get_project_subproject(project_info): ## returns a dict with {"project":["subproject_a",..]} dict_res = {} ## projects of level 0 q = "SELECT project_id as id FROM projects WHERE "+\ "project_id NOT IN (SELECT subproject_id from project_children);" res = ExecuteQuery(q) p_no_children = res["id"] for p in p_no_children: key = project_info[p]["string_id"] title = project_info[p]["title"] dict_res[key] = {"parent_project": "root", "title": title} q = "SELECT * FROM project_children" relationships = ExecuteQuery(q) cont = 0 length = len(relationships['project_id']) while cont < length: parent_num_id = relationships['project_id'][cont] child_num_id = relationships['subproject_id'][cont] parent_str_id = project_info[parent_num_id]["string_id"] child_str_id = project_info[child_num_id]["string_id"] child_title = project_info[child_num_id]["title"] dict_res[child_str_id] = { "parent_project": parent_str_id, "title": child_title } cont += 1 #print project_info return dict_res
def GetRepoEvolSentSendersIRC (repo, period, startdate, enddate): fields = 'COUNT(irclog.id) AS sent, COUNT(DISTINCT(pup.upeople_id)) AS senders' tables= GetTablesReposIRC() filters = GetFiltersReposIRC() + " AND c.name='"+repo+"'" filters += " AND irclog.type='COMMENT'" q = GetSQLPeriod(period,'date', fields, tables, filters, startdate, enddate) return(ExecuteQuery(q))
def GetSendersIRC (period, startdate, enddate, identities_db, type_analysis, evolutionary): fields = " count(distinct(nick)) as senders " tables = " irclog " + GetSQLReportFrom(identities_db, type_analysis) filters = GetSQLReportWhere(type_analysis, "author") filters += " and type='COMMENT' " q = BuildQuery(period, startdate, enddate, " date ", fields, tables, filters, evolutionary) return(ExecuteQuery(q))
def StaticNumRepositoriesIRC (period, startdate, enddate, identities_db=None, type_analysis=[]): fields = "SELECT COUNT(DISTINCT(channel_id)) AS repositories " tables = "FROM irclog " filters = "WHERE date >=" + startdate + " AND date < " + enddate filters += " AND type='COMMENT' " q = fields + tables + filters return(ExecuteQuery(q))
def StaticNumSendersIRC (period, startdate, enddate, identities_db=None, type_analysis=[]): fields = "SELECT count(distinct(nick)) as senders" tables = " FROM irclog " filters = "WHERE date >=" + startdate + " and date < " + enddate filters += " AND type='COMMENT' " q = fields + tables + filters return(ExecuteQuery(q))
def StaticNumSent(startdate, enddate): fields = " COUNT(*) as sent " tables = GetTablesOwnUniqueIdsMLS() filters = GetFiltersOwnUniqueIdsMLS() q = GetSQLGlobal('first_date', fields, tables, filters, startdate, enddate) sent = ExecuteQuery(q) return (sent)
def GetEvolChanges(period, startdate, enddate, value): fields = "count(issue_id) as "+ value+ "_changes" tables = "changes" filters = "new_value='"+value+"'" q = GetSQLPeriod(period, " changed_on", fields, tables, filters, startdate, enddate) return(ExecuteQuery(q))
def StaticNumSenders(startdate, enddate): fields = " COUNT(DISTINCT(pup.upeople_id)) as senders " tables = GetTablesOwnUniqueIdsMLS() filters = GetFiltersOwnUniqueIdsMLS() q = GetSQLGlobal('first_date', fields, tables, filters, startdate, enddate) senders = ExecuteQuery(q) return (senders)
def StaticNumAuthorsMediaWiki(period, startdate, enddate, identities_db, type_analysis): select = "SELECT count(distinct(user)) as authors" tables = " FROM wiki_pages_revs " where = " where date >=" + startdate + " and date < " + enddate q = select + tables + where return (ExecuteQuery(q))
def GetWaiting4Submitter (period, startdate, enddate, identities_db, type_analysis, evolutionary): fields = "count(distinct(c.id)) as WaitingForSubmitter " tables = " changes c, "+\ " issues i, "+\ " (select c.issue_id as issue_id, "+\ " c.old_value as old_value, "+\ " max(c.id) as id "+\ " from changes c, "+\ " issues i "+\ " where c.issue_id = i.id and "+\ " i.status='NEW' "+\ " group by c.issue_id, c.old_value) t1 " tables = tables + GetSQLReportFromSCR(identities_db, type_analysis) filters = " i.id = c.issue_id "+\ " and t1.id = c.id "+\ " and (c.field='CRVW' or c.field='Code-Review' or c.field='Verified' or c.field='VRIF') "+\ " and (c.new_value=-1 or c.new_value=-2) " filters = filters + GetSQLReportWhereSCR(type_analysis) if (evolutionary): q = GetSQLPeriod(period, " c.changed_on", fields, tables, filters, startdate, enddate) else: q = GetSQLGlobal(" c.changed_on ", fields, tables, filters, startdate, enddate) return(ExecuteQuery(q))
def GetTopSubmittersQuerySCR (days, startdate, enddate, identities_db, bots, limit, merged = False): date_limit = "" merged_sql = "" rol = "openers" action = "opened" filter_bots = '' for bot in bots: filter_bots = filter_bots+ " up.identifier<>'"+bot+"' and " if (days != 0 ): q = "SELECT @maxdate:=max(submitted_on) from issues limit 1" ExecuteQuery(q) date_limit = " AND DATEDIFF(@maxdate, submitted_on)<"+str(days) if (merged): merged_sql = " AND status='MERGED' " rol = "mergers" action = "merged" q = "SELECT up.id as id, up.identifier as "+rol+", "+\ " count(distinct(i.id)) as "+action+" "+\ " FROM people_upeople pup, issues i, "+identities_db+".upeople up "+\ " WHERE "+ filter_bots+ " "+\ " i.submitted_by = pup.people_id and "+\ " pup.upeople_id = up.id and "+\ " i.submitted_on >= "+ startdate+ " and "+\ " i.submitted_on < "+ enddate+ " "+\ " "+date_limit+ merged_sql+ " "+\ " GROUP BY up.identifier "+\ " ORDER BY "+action+" desc, id "+\ " LIMIT "+ limit return(q)
def _init_threads(self): # Returns dictionary of message_id threads. Each key contains a list # of emails associated to that thread (not ordered). # Retrieving all of the messages. query = """ select message_ID, is_response_of from messages where first_date > '%s' and first_date <= '%s' """ % (self.initdate, self.enddate) list_messages = ExecuteQuery(query) self.list_message_id = list_messages["message_ID"] self.list_is_response_of = list_messages["is_response_of"] messages = {} for message_id in self.list_message_id: # Looking for messages in the thread index = self.list_message_id.index(message_id) # Only analyzing those whose is_response_of is None, # those are the message 'root' of each thread. if self.list_is_response_of[index] is None: messages[message_id] = self._build_threads(message_id) # Adding the root message to the list in first place messages[message_id].insert(0, message_id) self.threads = messages
def GetCompaniesNameITS(startdate, enddate, identities_db, closed_condition, filter): affiliations = "" for aff in filter: affiliations += " c.name<>'" + aff + "' and " # list each of the companies analyzed # those are order by number of closed issues q = "select c.name "+\ "from issues i, "+\ " changes ch, "+\ " people_upeople pup, "+\ " "+ identities_db+ ".upeople_companies upc, "+\ " "+ identities_db+ ".companies c "+\ "where i.id = ch.issue_id and "+\ " ch.changed_by = pup.people_id and "+\ " pup.upeople_id = upc.upeople_id and "+\ " upc.company_id = c.id and "+\ " ch.changed_on >= "+ startdate+ " and "+\ " ch.changed_on < "+ enddate+" and "+\ " "+ affiliations +\ closed_condition +\ " group by c.name "+\ " order by count(distinct(i.id)) desc" data = ExecuteQuery(q) return (data)
def GetChangers(period, startdate, enddate, identities_db, type_analysis, evolutionary): #This function returns the evolution or agg number of changed issues #This function can be also reproduced using the Backlog function. #However this function is less time expensive. fields = " count(distinct(pup.upeople_id)) as changers " tables = " issues i, changes ch " + GetITSSQLReportFrom( identities_db, type_analysis) filters = " i.id = ch.issue_id " filters_ext = GetITSSQLReportWhere(type_analysis) if (filters_ext != ""): filters += " and " + filters_ext #unique identities filters if (type_analysis is None or len(type_analysis) != 2): #Specific case for the basic option where people_upeople table is needed #and not taken into account in the initial part of the query tables += ", people_upeople pup" filters += " and i.submitted_by = pup.people_id" elif (type_analysis[0] == "repository"): #Adding people_upeople table tables += ", people_upeople pup" filters += " and i.submitted_by = pup.people_id " #Action needed to replace issues filters by changes one filters = filters.replace("i.submitted", "ch.changed") q = BuildQuery(period, startdate, enddate, " ch.changed_on ", fields, tables, filters, evolutionary) data = ExecuteQuery(q) return (data)
def verboseThread(self): # TODO: at some point these numbers should be calculated when # retrieving the initial list of message_id, is_response_of values # Returns the most verbose thread (the biggest emails) if self.verbose == None: # variable was not initialize self.verbose = "" current_len = 0 # iterating through the root messages for message_id in self.threads.keys(): total_len_bodies = 0 # len of all of the body messages # iterating through each of the messages of the thread for msg in self.threads[message_id]: query = """ select length(message_body) as length from messages where message_ID = '%s' """ % (msg) result = ExecuteQuery(query) length = int(result["length"]) total_len_bodies = total_len_bodies + length if total_len_bodies > current_len: # New bigger thread found self.verbose = message_id current_len = total_len_bodies return Email(self.verbose, self.i_db)
def GetListPeopleIRC (startdate, enddate) : fields = "DISTINCT(pup.upeople_id) as id, count(irclog.id) total" tables = GetTablesOwnUniqueIdsIRC() filters = GetFiltersOwnUniqueIdsIRC() filters += " AND irclog.type='COMMENT' " filters += " GROUP BY nick ORDER BY total desc" q = GetSQLGlobal('date',fields,tables, filters, startdate, enddate) return(ExecuteQuery(q))
def GetPeopleStaticITS(developer_id, startdate, enddate, closed_condition): ## FIXME is this function used only to calculate closed issues? if not it must be ## fixed q = GetPeopleQueryITS(developer_id, None, startdate, enddate, False, closed_condition) data = ExecuteQuery(q) return (data)
def GetLastActivityITS(days, closed_condition): # opened issues days = str(days) q = "select count(*) as opened_"+days+" "+\ "from issues "+\ "where submitted_on >= ( "+\ " select (max(submitted_on) - INTERVAL "+days+" day) "+\ " from issues)" data1 = ExecuteQuery(q) # closed issues q = "select count(distinct(issue_id)) as closed_"+days+" "+\ "from changes "+\ "where "+closed_condition+" "+\ "and changed_on >= ( "+\ " select (max(changed_on) - INTERVAL "+days+" day) "+\ " from changes)" data2 = ExecuteQuery(q) # closers q = "SELECT count(distinct(pup.upeople_id)) as closers_"+days+" "+\ "FROM changes, people_upeople pup "+\ "WHERE pup.people_id = changes.changed_by and "+\ "changed_on >= ( "+\ " select (max(changed_on) - INTERVAL "+days+" day) "+\ " from changes) AND "+ closed_condition data3 = ExecuteQuery(q) # people_involved q = "SELECT count(distinct(pup.upeople_id)) as changers_"+days+" "+\ "FROM changes, people_upeople pup "+\ "WHERE pup.people_id = changes.changed_by and "+\ "changed_on >= ( "+\ " select (max(changed_on) - INTERVAL "+days+" day) "+\ " from changes)" data4 = ExecuteQuery(q) agg_data = dict(data1.items() + data2.items()) agg_data = dict(agg_data.items() + data3.items()) return (agg_data)
def GetAuthorsMediaWiki(period, startdate, enddate, identities_db, type_analysis, evolutionary): fields = " count(distinct(user)) as authors " tables = " wiki_pages_revs " + GetSQLReportFrom(identities_db, type_analysis) filters = GetSQLReportWhere(type_analysis, "author") q = BuildQuery(period, startdate, enddate, " date ", fields, tables, filters, evolutionary) return (ExecuteQuery(q))
def GetListPeopleMediaWiki(startdate, enddate): fields = "DISTINCT(pup.upeople_id) as id, count(wiki_pages_revs.id) total" tables = GetTablesOwnUniqueIdsMediaWiki() filters = GetFiltersOwnUniqueIdsMediaWiki() filters += " GROUP BY user ORDER BY total desc" q = GetSQLGlobal('date', fields, tables, filters, startdate, enddate) data = ExecuteQuery(q) return (data)
def StaticNumReviewsMediaWiki(period, startdate, enddate, identities_db, type_analysis): select = "SELECT count(rev_id) as reviews, "+\ "DATE_FORMAT (min(date), '%Y-%m-%d') as first_date, "+\ "DATE_FORMAT (max(date), '%Y-%m-%d') as last_date " tables = " FROM wiki_pages_revs " where = " where date >=" + startdate + " and date < " + enddate q = select + tables + where return (ExecuteQuery(q))
def GetListPeopleMLS(startdate, enddate): fields = "DISTINCT(pup.upeople_id) as id, count(m.message_ID) total" tables = GetTablesOwnUniqueIdsMLS() filters = GetFiltersOwnUniqueIdsMLS() filters += " GROUP BY id ORDER BY total desc" q = GetSQLGlobal('first_date', fields, tables, filters, startdate, enddate) data = ExecuteQuery(q) return (data)
def GetIPs(period, startdate, enddate, evolutionary): # Generic function to obtain number of IPs fields = "count(distinct(ip)) as ips" tables = "downloads" filters = "" query = BuildQuery(period, startdate, enddate, " date ", fields, tables, filters, evolutionary) return (ExecuteQuery(query))
def StaticNumSentIRC (period, startdate, enddate, identities_db=None, type_analysis=[]): fields = "SELECT count(message) as sent, \ DATE_FORMAT (min(date), '%Y-%m-%d') as first_date, \ DATE_FORMAT (max(date), '%Y-%m-%d') as last_date " tables = " FROM irclog " filters = "WHERE date >=" + startdate + " and date < " + enddate filters += " AND type='COMMENT' " q = fields + tables + filters return(ExecuteQuery(q))
def GetPeopleListITS(startdate, enddate): fields = "DISTINCT(pup.upeople_id) as pid, count(c.id) as total" tables = GetTablesOwnUniqueIdsITS() filters = GetFiltersOwnUniqueIdsITS() filters += " GROUP BY pid ORDER BY total desc" q = GetSQLGlobal('changed_on', fields, tables, filters, startdate, enddate) data = ExecuteQuery(q) return (data)
def GetActiveSubmittersITS(days, enddate): # FIXME parameters should be: startdate and enddate q0 = "SELECT DISTINCT(pup.upeople_id) AS active_submitters"+\ " FROM issues i, people_upeople pup"+\ " WHERE pup.people_id = i.submitted_by AND"+\ " submitted_on >= ( %s - INTERVAL %s day)" q1 = q0 % (enddate, days) data = ExecuteQuery(q1) return (data)
def GetActiveChangersITS(days, enddate): # FIXME parameters should be: startdate and enddate q0 = "SELECT distinct(pup.upeople_id) as active_changers"+\ " FROM changes, people_upeople pup "+\ " WHERE pup.people_id = changes.changed_by and "+\ " changed_on >= ( %s - INTERVAL %s day)" q1 = q0 % (enddate, days) data = ExecuteQuery(q1) return (data)
def lastActivityMediaWiki (init_date, days) : #commits days = str(days) q = "select count(wiki_pages_revs.id) as reviews_"+days+" "+\ "from wiki_pages_revs "+\ "where date >= ("+ init_date+ " - INTERVAL "+days+" day)" data1 = ExecuteQuery(q) q = "select count(distinct(pup.upeople_id)) as authors_"+days+" "+\ "from wiki_pages_revs, people_upeople pup "+\ "where pup.people_id = user and "+\ " date >= ("+ init_date+ " - INTERVAL "+days+" day)" data2 = ExecuteQuery(q) agg_data = dict(data1.items() + data2.items()) return(agg_data)
def lastActivity (days) : days = str(days) #commits q = "select count(distinct(message_ID)) as sent_"+days+" "+\ " from messages "+\ " where first_date >= ( "+\ " select (max(first_date) - INTERVAL "+days+" day) "+\ " from messages)" data1 = ExecuteQuery(q) q = "select count(distinct(pup.upeople_id)) as senders_"+days+" "+\ " from messages m, "+\ " people_upeople pup, "+\ " messages_people mp "+\ " where pup.people_id = mp.email_address and "+\ " m.message_ID = mp.message_id and "+\ " m.first_date >= (select (max(first_date) - INTERVAL "+days+" day) "+\ " from messages)" data2 = ExecuteQuery(q) agg_data = dict(data1.items() + data2.items()) return(agg_data)
def GetLastActivityITS (days, closed_condition): # opened issues days = str(days) q = "select count(*) as opened_"+days+" "+\ "from issues "+\ "where submitted_on >= ( "+\ " select (max(submitted_on) - INTERVAL "+days+" day) "+\ " from issues)" data1 = ExecuteQuery(q) # closed issues q = "select count(distinct(issue_id)) as closed_"+days+" "+\ "from changes "+\ "where "+closed_condition+" "+\ "and changed_on >= ( "+\ " select (max(changed_on) - INTERVAL "+days+" day) "+\ " from changes)" data2 = ExecuteQuery(q) # closers q = "SELECT count(distinct(pup.upeople_id)) as closers_"+days+" "+\ "FROM changes, people_upeople pup "+\ "WHERE pup.people_id = changes.changed_by and "+\ "changed_on >= ( "+\ " select (max(changed_on) - INTERVAL "+days+" day) "+\ " from changes) AND "+ closed_condition data3 = ExecuteQuery(q) # people_involved q = "SELECT count(distinct(pup.upeople_id)) as changers_"+days+" "+\ "FROM changes, people_upeople pup "+\ "WHERE pup.people_id = changes.changed_by and "+\ "changed_on >= ( "+\ " select (max(changed_on) - INTERVAL "+days+" day) "+\ " from changes)" data4 = ExecuteQuery(q) agg_data = dict(data1.items()+data2.items()) agg_data = dict(agg_data.items()+data3.items()) return (agg_data)