def get_top_data(startdate, enddate, identities_db, filter_, npeople): top = {} mcsenders = DataSource.get_metrics("csenders", QAForums) masenders = DataSource.get_metrics("asenders", QAForums) mqsenders = DataSource.get_metrics("qsenders", QAForums) mparticipants = DataSource.get_metrics("participants", QAForums) period = None type_analysis = None if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) if filter_ is None: top['csenders.'] = mcsenders.get_list(mfilter, 0) top['csenders.last month'] = mcsenders.get_list(mfilter, 31) top['csenders.last year'] = mcsenders.get_list(mfilter, 365) top['asenders.'] = masenders.get_list(mfilter, 0) top['asenders.last month'] = masenders.get_list(mfilter, 31) top['asenders.last year'] = masenders.get_list(mfilter, 365) top['qsenders.'] = mqsenders.get_list(mfilter, 0) top['qsenders.last month'] = mqsenders.get_list(mfilter, 31) top['qsenders.last year'] = mqsenders.get_list(mfilter, 365) top['participants.'] = mparticipants.get_list(mfilter, 0) top['participants.last month'] = mparticipants.get_list(mfilter, 31) top['participants.last year'] = mparticipants.get_list(mfilter, 365) else: logging.info("QAForums does not support yet top for filters.") return top
def get_top_data (startdate, enddate, identities_db, filter_ = None, npeople = None): def filter_ips(ips): new_ips = {} new_ips['downloads'] = ips['downloads'] new_ips['ips'] = [] for ip in ips['ips']: # ipv4 new_ip_aux = ip.split(".") new_ip = ip if len(new_ip_aux) == 4: new_ip = "x.x."+new_ip_aux[2]+"."+new_ip_aux[3] # ipv6 new_ip_aux = ip.split(":") if len(new_ip_aux) > 1: new_ip = new_ip_aux[0]+":X" new_ips['ips'].append(new_ip) return new_ips top = {} mips = DataSource.get_metrics("ips", DownloadsDS) mpackages = DataSource.get_metrics("packages", DownloadsDS) period = None type_analysis = None if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) if filter_ is None: top['ips.'] = filter_ips(mips.get_list(mfilter, 0)) top['packages.'] = mpackages.get_list(mfilter, 0) else: logging.info("DownloadsDS does not support yet top for filters.") return top
def get_events(): events = {} newcomers = DataSource.get_metrics("newauthors", SCM) goneauthors = DataSource.get_metrics("goneauthors", SCM) if newcomers is None and goneauthors is None: return events events['newcomers'] = newcomers.get_list() events['goneauthors'] = goneauthors.get_list() return events
def get_events(): events = {} newcomers = DataSource.get_metrics("newauthors", SCM) goneauthors = DataSource.get_metrics("goneauthors", SCM) if newcomers is None and goneauthors is None: return events events['newcomers'] = newcomers.get_list() events['goneauthors'] = goneauthors.get_list() return events
def get_top_people(startdate, enddate, idb): """Top people for all data sources.""" import vizgrimoire.GrimoireSQL from vizgrimoire.SCR import SCR from vizgrimoire.MLS import MLS from vizgrimoire.ITS import ITS from vizgrimoire.IRC import IRC from vizgrimoire.Mediawiki import Mediawiki from vizgrimoire.metrics.metrics_filter import MetricFilters from vizgrimoire.data_source import DataSource npeople = "10000" # max limit, all people included min_data_sources = 3 # min data sources to be in the list tops = {} all_top = {} all_top_min_ds = {} period = None type_analysis = None mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) # SCR and SCM are the same. Don't use both for Tops mopeners = DataSource.get_metrics("submitters", SCR) if mopeners: tops["scr"] = mopeners.get_list(mfilter, 0) tops["scr"]["identifier"] = tops["scr"].pop("openers") msenders = DataSource.get_metrics("senders", MLS) if msenders: tops["mls"] = msenders.get_list(mfilter, 0) tops["mls"]["identifier"] = tops["mls"].pop("senders") mopeners = DataSource.get_metrics("openers", ITS) if mopeners: tops["its"] = mopeners.get_list(mfilter, 0) tops["its"]["identifier"] = tops["its"].pop("openers") msenders = DataSource.get_metrics("senders", IRC) if msenders: tops["irc"] = msenders.get_list(mfilter, 0) tops["irc"]["identifier"] = tops["irc"].pop("senders") mauthors = DataSource.get_metrics("authors", Mediawiki) if mauthors: tops["mediawiki"] = mauthors.get_list(mfilter, 0) tops["mediawiki"]["identifier"] = tops["mediawiki"].pop("reviews") # Build the consolidated top list using all data sources data # Only people in all data sources is used for ds in tops: pos = 1 for id in tops[ds]['id']: if id not in all_top: all_top[id] = [] all_top[id].append({ "ds": ds, "pos": pos, "identifier": tops[ds]['identifier'][pos - 1] }) pos += 1 for id in all_top: if len(all_top[id]) >= min_data_sources: all_top_min_ds[id] = all_top[id] return all_top_min_ds
def get_top_data(startdate, enddate, identities_db, filter_, npeople): bots = Pullpo.get_bots() top_all = None mreviewers = DataSource.get_metrics("reviewers", Pullpo) msubmitters = DataSource.get_metrics("submitters", Pullpo) mclosers = DataSource.get_metrics("closers", Pullpo) mmergers = DataSource.get_metrics("mergers", Pullpo) mparticipants = DataSource.get_metrics("participants", Pullpo) period = None type_analysis = None if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) if filter_ is None: top_reviewers = {} top_reviewers['reviewers'] = mreviewers.get_list(mfilter, 0) top_reviewers['reviewers.last month'] = mreviewers.get_list( mfilter, 31) top_reviewers['reviewers.last year'] = mreviewers.get_list( mfilter, 365) top_submitters = {} top_submitters['submitters.'] = msubmitters.get_list(mfilter, 0) top_submitters['submitters.last month'] = msubmitters.get_list( mfilter, 31) top_submitters['submitters.last year'] = msubmitters.get_list( mfilter, 365) top_closers = {} top_closers['closers.'] = mclosers.get_list(mfilter, 0) top_closers['closers.last month'] = mclosers.get_list(mfilter, 31) top_closers['closers.last year'] = mclosers.get_list(mfilter, 365) top_mergers = {} top_mergers['mergers.'] = mmergers.get_list(mfilter, 0) top_mergers['mergers.last month'] = mmergers.get_list(mfilter, 31) top_mergers['mergers.last year'] = mmergers.get_list(mfilter, 365) top_participants = {} top_participants['participants.'] = mparticipants.get_list( mfilter, 0) top_participants[ 'participants.last month'] = mparticipants.get_list( mfilter, 31) top_participants[ 'participants.last year'] = mparticipants.get_list( mfilter, 365) # The order of the list item change so we can not check it top_all = dict(top_reviewers.items() + top_submitters.items() + top_closers.items() + top_mergers.items() + top_participants.items()) else: logging.info("Pullpo does not support yet top for filters.") return (top_all)
def get_top_data (startdate, enddate, identities_db, filter_, npeople): bots = SCR.get_bots() top_all = None mreviewers = DataSource.get_metrics("reviewers", SCR) mopeners = DataSource.get_metrics("submitters", SCR) mmergers = DataSource.get_metrics("closers", SCR) mcorereviewers = DataSource.get_metrics("active_core_reviewers", SCR) mparticipants = DataSource.get_metrics("participants", SCR) period = None type_analysis = None if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) if filter_ is None: top_reviewers = {} top_reviewers['reviewers'] = mreviewers.get_list(mfilter, 0) top_reviewers['reviewers.last month']= mreviewers.get_list(mfilter, 31) top_reviewers['reviewers.last year']= mreviewers.get_list(mfilter, 365) top_openers = {} top_openers['openers.'] = mopeners.get_list(mfilter, 0) top_openers['openers.last month']= mopeners.get_list(mfilter, 31) top_openers['openers.last year'] = mopeners.get_list(mfilter, 365) top_mergers = {} top_mergers['mergers.'] = mmergers.get_list(mfilter, 0) top_mergers['mergers.last month'] = mmergers.get_list(mfilter, 31) top_mergers['mergers.last year'] = mmergers.get_list(mfilter, 365) top_participants = {} top_participants['participants.'] = mparticipants.get_list(mfilter, 0) top_participants['participants.last month'] = mparticipants.get_list(mfilter, 31) top_participants['participants.last year'] = mparticipants.get_list(mfilter, 365) top_core_reviewers = {} top_core_reviewers['active_core_reviewers.'] = mcorereviewers.get_list(mfilter, 0) top_core_reviewers['active_core_reviewers.last month'] = mcorereviewers.get_list(mfilter, 31) top_core_reviewers['active_core_reviewers.last year'] = mcorereviewers.get_list(mfilter, 365) # The order of the list item change so we can not check it top_all = dict(top_reviewers.items() + top_openers.items() + top_mergers.items() + top_participants.items() + top_core_reviewers.items()) else: filter_name = filter_.get_name() if filter_name in ["company","domain","repository"]: if filter_name in ["company","domain","repository"]: top_all = {} top_all['active_core_reviewers.'] = mcorereviewers.get_list(mfilter, 0) top_all['active_core_reviewers.last month'] = mcorereviewers.get_list(mfilter, 31) top_all['active_core_reviewers.last year'] = mcorereviewers.get_list(mfilter, 365) else: # Remove filters above if there are performance issues top_all = mcorereviewers.get_list(mfilter) else: top_all = None return (top_all)
def get_top_people (startdate, enddate, idb): """Top people for all data sources.""" import vizgrimoire.GrimoireSQL from vizgrimoire.SCR import SCR from vizgrimoire.MLS import MLS from vizgrimoire.ITS import ITS from vizgrimoire.IRC import IRC from vizgrimoire.Mediawiki import Mediawiki from vizgrimoire.metrics.metrics_filter import MetricFilters from vizgrimoire.data_source import DataSource npeople = "10000" # max limit, all people included min_data_sources = 3 # min data sources to be in the list tops = {} all_top = {} all_top_min_ds = {} period = None type_analysis = None mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) # SCR and SCM are the same. Don't use both for Tops mopeners = DataSource.get_metrics("submitters", SCR) if mopeners: tops["scr"] = mopeners.get_list(mfilter, 0) tops["scr"]["identifier"] = tops["scr"].pop("openers") msenders = DataSource.get_metrics("senders", MLS) if msenders: tops["mls"] = msenders.get_list(mfilter, 0) tops["mls"]["identifier"] = tops["mls"].pop("senders") mopeners = DataSource.get_metrics("openers", ITS) if mopeners: tops["its"] = mopeners.get_list(mfilter, 0) tops["its"]["identifier"] = tops["its"].pop("openers") msenders = DataSource.get_metrics("senders", IRC) if msenders: tops["irc"] = msenders.get_list(mfilter, 0) tops["irc"]["identifier"] = tops["irc"].pop("senders") mauthors = DataSource.get_metrics("authors", Mediawiki) if mauthors: tops["mediawiki"] = mauthors.get_list(mfilter, 0) tops["mediawiki"]["identifier"] = tops["mediawiki"].pop("reviews") # Build the consolidated top list using all data sources data # Only people in all data sources is used for ds in tops: pos = 1 for id in tops[ds]['id']: if id not in all_top: all_top[id] = [] all_top[id].append({"ds":ds,"pos":pos,"identifier":tops[ds]['identifier'][pos-1]}) pos += 1 for id in all_top: if len(all_top[id])>=min_data_sources: all_top_min_ds[id] = all_top[id] return all_top_min_ds
def GetCommitsSummaryCompanies(period, startdate, enddate, identities_db, num_organizations): # This function returns the following dataframe structrure # unixtime, date, week/month/..., company1, company2, ... company[num_organizations -1], others # The 3 first fields are used for data and ordering purposes # The "companyX" fields are those that provide info about that company # The "Others" field is the aggregated value of the rest of the organizations # Companies above num_organizations will be aggregated in Others from vizgrimoire.SCM import SCM metric = DataSource.get_metrics("organizations", SCM) organizations = metric.get_list() organizations = organizations['name'] first_organizations = {} count = 1 for company in organizations: company_name = "'" + company + "'" type_analysis = ['company', company_name] mcommits = DataSource.get_metrics("commits", SCM) mfilter = MetricFilters(period, startdate, enddate, type_analysis) mfilter_orig = mcommits.filters mcommits.filters = mfilter commits = mcommits.get_ts() mcommits.filters = mfilter_orig # commits = EvolCommits(period, startdate, enddate, identities_db, ["company", company_name]) # commits = completePeriodIds(commits, period, startdate, enddate) # Rename field commits to company name commits[company] = commits["commits"] del commits['commits'] if (count <= num_organizations): #Case of organizations with entity in the dataset first_organizations = dict(first_organizations.items() + commits.items()) else: #Case of organizations that are aggregated in the field Others if 'Others' not in first_organizations: first_organizations['Others'] = commits[company] else: first_organizations['Others'] = [ a + b for a, b in zip(first_organizations['Others'], commits[company]) ] count = count + 1 #TODO: remove global variables... first_organizations = completePeriodIds(first_organizations, period, startdate, enddate) return (first_organizations)
def get_filter_items(filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() # Change filter to GrimoireLib notation filter_name = filter_name.replace("+", MetricFilters.DELIMITER) if (filter_name == "repository"): metric = DataSource.get_metrics("repositories", SCM) elif (filter_name == "company"): metric = DataSource.get_metrics("organizations", SCM) elif (filter_name == "country"): metric = DataSource.get_metrics("countries", SCM) elif (filter_name == "domain"): metric = DataSource.get_metrics("domains", SCM) elif (filter_name == "project"): metric = DataSource.get_metrics("projects", SCM) elif (filter_name == "people2"): metric = DataSource.get_metrics("people2", SCM) elif (filter_name == "company" + MetricFilters.DELIMITER + "country"): metric = DataSource.get_metrics("organizations+countries", SCM) elif (filter_name == "company" + MetricFilters.DELIMITER + "project"): metric = DataSource.get_metrics("organizations+projects", SCM) else: logging.error("SCM " + filter_name + " not supported") return items if metric is not None: items = metric.get_list() return items
def get_filter_items(filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() # Change filter to GrimoireLib notation filter_name = filter_name.replace("+", MetricFilters.DELIMITER) if (filter_name == "repository"): metric = DataSource.get_metrics("repositories", SCM) elif (filter_name == "company"): metric = DataSource.get_metrics("organizations", SCM) elif (filter_name == "country"): metric = DataSource.get_metrics("countries", SCM) elif (filter_name == "domain"): metric = DataSource.get_metrics("domains", SCM) elif (filter_name == "project"): metric = DataSource.get_metrics("projects", SCM) elif (filter_name == "people2"): metric = DataSource.get_metrics("people2", SCM) elif (filter_name == "company"+MetricFilters.DELIMITER+"country"): metric = DataSource.get_metrics("organizations+countries", SCM) elif (filter_name == "company"+MetricFilters.DELIMITER+"project"): metric = DataSource.get_metrics("organizations+projects", SCM) else: logging.error("SCM " + filter_name + " not supported") return items if metric is not None: items = metric.get_list() return items
def get_filter_items(cls, filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() if (filter_name == "repository"): metric = DataSource.get_metrics("trackers", cls) elif (filter_name == "company"): metric = DataSource.get_metrics("organizations", cls) elif (filter_name == "country"): metric = DataSource.get_metrics("countries", cls) elif (filter_name == "domain"): metric = DataSource.get_metrics("domains", cls) elif (filter_name == "project"): metric = DataSource.get_metrics("projects", cls) elif (filter_name == "people2"): metric = DataSource.get_metrics("people2", cls) elif (filter_name == "company"+MetricFilters.DELIMITER+"country"): metric = DataSource.get_metrics("organizations+countries", cls) elif (filter_name == "company"+MetricFilters.DELIMITER+"project"): metric = DataSource.get_metrics("organizations+projects", cls) else: logging.error(filter_name + " not supported") return items items = metric.get_list() return items
def get_filter_items(cls, filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() if (filter_name == "repository"): metric = DataSource.get_metrics("trackers", cls) elif (filter_name == "company"): metric = DataSource.get_metrics("organizations", cls) elif (filter_name == "country"): metric = DataSource.get_metrics("countries", cls) elif (filter_name == "domain"): metric = DataSource.get_metrics("domains", cls) elif (filter_name == "project"): metric = DataSource.get_metrics("projects", cls) elif (filter_name == "people2"): metric = DataSource.get_metrics("people2", cls) elif (filter_name == "company" + MetricFilters.DELIMITER + "country"): metric = DataSource.get_metrics("organizations+countries", cls) elif (filter_name == "company" + MetricFilters.DELIMITER + "project"): metric = DataSource.get_metrics("organizations+projects", cls) else: logging.error(filter_name + " not supported") return items items = metric.get_list() return items
def get_filter_items(filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() if (filter_name == "repository"): metric = DataSource.get_metrics("repositories", IRC) items = metric.get_list() # items = GetReposNameIRC() elif (filter_name == "people2"): metric = DataSource.get_metrics("senders", IRC) items = metric.get_list() items['name'] = items.pop('senders') else: logging.error("IRC " + filter_name + " not supported") return items
def get_filter_items(filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() if (filter_name == "repository"): metric = DataSource.get_metrics("repositories", IRC) items = metric.get_list() # items = GetReposNameIRC() elif (filter_name == "people2"): metric = DataSource.get_metrics("senders", IRC) items = metric.get_list() items['name'] = items.pop('senders') else: logging.error("IRC " + filter_name + " not supported") return items
def get_top_data (startdate, enddate, identities_db, filter_, npeople): bots = Pullpo.get_bots() top_all = None mreviewers = DataSource.get_metrics("reviewers", Pullpo) msubmitters = DataSource.get_metrics("submitters", Pullpo) mclosers = DataSource.get_metrics("closers", Pullpo) mmergers = DataSource.get_metrics("mergers", Pullpo) mparticipants = DataSource.get_metrics("participants", Pullpo) period = None type_analysis = None if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) if filter_ is None: top_reviewers = {} top_reviewers['reviewers'] = mreviewers.get_list(mfilter, 0) top_reviewers['reviewers.last month']= mreviewers.get_list(mfilter, 31) top_reviewers['reviewers.last year']= mreviewers.get_list(mfilter, 365) top_submitters = {} top_submitters['submitters.'] = msubmitters.get_list(mfilter, 0) top_submitters['submitters.last month']= msubmitters.get_list(mfilter, 31) top_submitters['submitters.last year'] = msubmitters.get_list(mfilter, 365) top_closers = {} top_closers['closers.'] = mclosers.get_list(mfilter, 0) top_closers['closers.last month'] = mclosers.get_list(mfilter, 31) top_closers['closers.last year'] = mclosers.get_list(mfilter, 365) top_mergers = {} top_mergers['mergers.'] = mmergers.get_list(mfilter, 0) top_mergers['mergers.last month'] = mmergers.get_list(mfilter, 31) top_mergers['mergers.last year'] = mmergers.get_list(mfilter, 365) top_participants = {} top_participants['participants.'] = mparticipants.get_list(mfilter, 0) top_participants['participants.last month'] = mparticipants.get_list(mfilter, 31) top_participants['participants.last year'] = mparticipants.get_list(mfilter, 365) # The order of the list item change so we can not check it top_all = dict(top_reviewers.items() + top_submitters.items() + top_closers.items() + top_mergers.items() + top_participants.items()) else: logging.info("Pullpo does not support yet top for filters.") return (top_all)
def get_top_data_authors(startdate, enddate, i_db, filter_, npeople): top = {} mauthors = DataSource.get_metrics("authors", SCM) if mauthors is None: return top period = None type_analysis = None if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) mfilter.global_filter = mauthors.filters.global_filter if filter_ is None: top['authors.'] = mauthors.get_list(mfilter, 0) top['authors.last month'] = mauthors.get_list(mfilter, 31) top['authors.last year'] = mauthors.get_list(mfilter, 365) elif filter_.get_name() in ["company", "repository", "project"]: if filter_.get_name() in ["company", "repository", "project"]: top['authors.'] = mauthors.get_list(mfilter, 0) top['authors.last month'] = mauthors.get_list(mfilter, 31) top['authors.last year'] = mauthors.get_list(mfilter, 365) else: # If we have performance issues with tops, remove filters above # to avoid computing trends for tops top = mauthors.get_list(mfilter) else: logging.info("Top authors not support for " + filter_.get_name()) return top
def get_list(self): from vizgrimoire.data_source import DataSource from vizgrimoire.filter import Filter startdate = self.filters.startdate enddate = self.filters.enddate closed_condition = ITS._get_closed_condition() if self.filters.closed_condition is not None: closed_condition = self.filters.closed_condition bots = DataSource.get_filter_bots(Filter("domain")) fbots = '' for bot in bots: fbots += " dom.name<>'" + bot + "' and " tables = Set([]) filters = Set([]) tables.union_update(self.db.GetTablesDomains(self.db.identities_db)) tables_str = self.db._get_tables_query(tables) filters.union_update(self.db.GetFiltersDomains()) filters_str = self.db._get_filters_query(filters) q = "SELECT DISTINCT(SUBSTR(email,LOCATE('@',email)+1)) as domain "+\ "FROM "+ tables_str + " "+\ "WHERE " + filters_str +" AND "+\ " "+ fbots +" "+\ " c.changed_on >= "+ startdate+ " AND "+\ " c.changed_on < "+ enddate+ " AND "+\ " "+ closed_condition+" "+\ "GROUP BY domain "+\ "ORDER BY COUNT(DISTINCT(c.issue_id)) DESC, domain LIMIT " + str(Metrics.domains_limit) data = self.db.ExecuteQuery(q) data['name'] = data.pop('domain') return (data)
def GetClosedSummaryCompanies (period, startdate, enddate, identities_db, closed_condition, num_organizations): from vizgrimoire.ITS import ITS count = 1 first_organizations = {} metric = DataSource.get_metrics("organizations", ITS) organizations = metric.get_list() organizations = organizations['name'] for company in organizations: type_analysis = ["company", "'"+company+"'"] filter_com = MetricFilters(period, startdate, enddate, type_analysis) mclosed = ITS.get_metrics("closed", ITS) mclosed.filters = filter_com closed = mclosed.get_ts() # Rename field closed to company name closed[company] = closed["closed"] del closed['closed'] if (count <= num_organizations): #Case of organizations with entity in the dataset first_organizations = dict(first_organizations.items() + closed.items()) else : #Case of organizations that are aggregated in the field Others if 'Others' not in first_organizations: first_organizations['Others'] = closed[company] else: first_organizations['Others'] = [a+b for a, b in zip(first_organizations['Others'],closed[company])] count = count + 1 first_organizations = completePeriodIds(first_organizations, period, startdate, enddate) return(first_organizations)
def get_list(self): from vizgrimoire.data_source import DataSource from vizgrimoire.filter import Filter startdate = self.filters.startdate enddate = self.filters.enddate closed_condition = ITS._get_closed_condition() if self.filters.closed_condition is not None: closed_condition = self.filters.closed_condition bots = DataSource.get_filter_bots(Filter("domain")) fbots = '' for bot in bots: fbots += " dom.name<>'"+bot+"' and " tables = Set([]) filters = Set([]) tables.union_update(self.db.GetTablesDomains(self.db.identities_db)) tables.add(self.db.identities_db + ".domains dom") tables_str = self.db._get_tables_query(tables) filters.union_update(self.db.GetFiltersDomains()) filters_str = self.db._get_filters_query(filters) q = "SELECT dom.name "+\ "FROM "+ tables_str + " "+\ "WHERE " + filters_str +" AND "+\ " dom.id = upd.domain_id and "+\ " "+ fbots +" "+\ " c.changed_on >= "+ startdate+ " AND "+\ " c.changed_on < "+ enddate+ " AND "+\ " "+ closed_condition+" "+\ "GROUP BY dom.name "+\ "ORDER BY COUNT(DISTINCT(c.issue_id)) DESC LIMIT " + str(Metrics.domains_limit) data = self.db.ExecuteQuery(q) return (data)
def get_evolutionary_data (period, startdate, enddate, i_db, filter_ = None): metrics = DockerHubDS.get_metrics_data(period, startdate, enddate, i_db, filter_, True) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(DockerHubDS, period, startdate, enddate, True) return dict(metrics.items()+studies.items())
def GetSentSummaryCompanies (period, startdate, enddate, identities_db, num_organizations, projects_db): count = 1 first_organizations = {} metric = DataSource.get_metrics("organizations", MLS) organizations = metric.get_list() for company in organizations: type_analysis = ["company", "'"+company+"'"] sent = EvolEmailsSent(period, startdate, enddate, identities_db, type_analysis, projects_db) sent = completePeriodIds(sent, period, startdate, enddate) # Rename field sent to company name sent[company] = sent["sent"] del sent['sent'] if (count <= num_organizations): #Case of organizations with entity in the dataset first_organizations = dict(first_organizations.items() + sent.items()) else : #Case of organizations that are aggregated in the field Others if 'Others' not in first_organizations: first_organizations['Others'] = sent[company] else: first_organizations['Others'] = [a+b for a, b in zip(first_organizations['Others'],sent[company])] count = count + 1 first_organizations = completePeriodIds(first_organizations, period, startdate, enddate) return(first_organizations)
def get_top_data_authors (startdate, enddate, i_db, filter_, npeople): top = {} mauthors = DataSource.get_metrics("authors", SCM) if mauthors is None: return top period = None type_analysis = None if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) mfilter.global_filter = mauthors.filters.global_filter if filter_ is None: top['authors.'] = mauthors.get_list(mfilter, 0) top['authors.last month'] = mauthors.get_list(mfilter, 31) top['authors.last year'] = mauthors.get_list(mfilter, 365) elif filter_.get_name() in ["company","repository","project"]: if filter_.get_name() in ["company","repository","project"]: top['authors.'] = mauthors.get_list(mfilter, 0) top['authors.last month'] = mauthors.get_list(mfilter, 31) top['authors.last year'] = mauthors.get_list(mfilter, 365) else: # If we have performance issues with tops, remove filters above # to avoid computing trends for tops top = mauthors.get_list(mfilter) else: logging.info("Top authors not support for " + filter_.get_name()) return top
def get_filter_items(filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() #TODO: repository needs to be change to tag, once this is accepted as new # data source in VizGrimoireJS-lib if (filter_name == "repository"): metric = DataSource.get_metrics("tags", QAForums) # items = QAForums.tags_name(startdate, enddate) items = metric.get_list() if (filter_name == "people2"): metric = DataSource.get_metrics("participants", QAForums) items = metric.get_list() else: logging.error("QAForums " + filter_name + " not supported") return items
def get_filter_items(filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() #TODO: repository needs to be change to tag, once this is accepted as new # data source in VizGrimoireJS-lib if (filter_name == "repository"): metric = DataSource.get_metrics("tags", QAForums) # items = QAForums.tags_name(startdate, enddate) items = metric.get_list() if (filter_name == "people2"): metric = DataSource.get_metrics("participants", QAForums) items = metric.get_list() else: logging.error("QAForums " + filter_name + " not supported") return items
def get_evolutionary_data (cls, period, startdate, enddate, identities_db, filter_ = None): closed_condition = cls._get_closed_condition() metrics = cls.get_metrics_data(period, startdate, enddate, identities_db, filter_, True) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(cls, period, startdate, enddate, True) return dict(metrics.items()+studies.items())
def get_evolutionary_data(period, startdate, enddate, i_db, filter_=None): metrics = ReleasesDS.get_metrics_data(period, startdate, enddate, i_db, filter_, True) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(ReleasesDS, period, startdate, enddate, True) return dict(metrics.items() + studies.items())
def get_evolutionary_data (cls, period, startdate, enddate, identities_db, filter_ = None): closed_condition = cls._get_closed_condition() metrics = cls.get_metrics_data(period, startdate, enddate, identities_db, filter_, True) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(cls, period, startdate, enddate, True) return dict(metrics.items()+studies.items())
def get_evolutionary_data (period, startdate, enddate, identities_db, filter_ = None): metrics = Pullpo.get_metrics_data(period, startdate, enddate, identities_db, filter_, True) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(Pullpo, period, startdate, enddate, True) evol_data = dict(metrics.items()+studies.items()) return evol_data
def __get_data(period, startdate, enddate, i_db, filter_, evol): metrics = QAForums.get_metrics_data(period, startdate, enddate, i_db, filter_, evol) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(QAForums, period, startdate, enddate, evol) return dict(metrics.items() + studies.items())
def GetCommitsSummaryCompanies (period, startdate, enddate, identities_db, num_organizations): # This function returns the following dataframe structrure # unixtime, date, week/month/..., company1, company2, ... company[num_organizations -1], others # The 3 first fields are used for data and ordering purposes # The "companyX" fields are those that provide info about that company # The "Others" field is the aggregated value of the rest of the organizations # Companies above num_organizations will be aggregated in Others from vizgrimoire.SCM import SCM metric = DataSource.get_metrics("organizations", SCM) organizations = metric.get_list() organizations = organizations['name'] first_organizations = {} count = 1 for company in organizations: company_name = "'"+company+"'" type_analysis = ['company', company_name] mcommits = DataSource.get_metrics("commits", SCM) mfilter = MetricFilters(period, startdate, enddate, type_analysis) mfilter_orig = mcommits.filters mcommits.filters = mfilter commits = mcommits.get_ts() mcommits.filters = mfilter_orig # commits = EvolCommits(period, startdate, enddate, identities_db, ["company", company_name]) # commits = completePeriodIds(commits, period, startdate, enddate) # Rename field commits to company name commits[company] = commits["commits"] del commits['commits'] if (count <= num_organizations): #Case of organizations with entity in the dataset first_organizations = dict(first_organizations.items() + commits.items()) else : #Case of organizations that are aggregated in the field Others if 'Others' not in first_organizations: first_organizations['Others'] = commits[company] else: first_organizations['Others'] = [a+b for a, b in zip(first_organizations['Others'],commits[company])] count = count + 1 #TODO: remove global variables... first_organizations = completePeriodIds(first_organizations, period, startdate, enddate) return(first_organizations)
def get_top_data(startdate, enddate, identities_db, filter_=None, npeople=None): def filter_ips(ips): new_ips = {} new_ips['downloads'] = ips['downloads'] new_ips['ips'] = [] for ip in ips['ips']: # ipv4 new_ip_aux = ip.split(".") new_ip = ip if len(new_ip_aux) == 4: new_ip = "x.x." + new_ip_aux[2] + "." + new_ip_aux[3] # ipv6 new_ip_aux = ip.split(":") if len(new_ip_aux) > 1: new_ip = new_ip_aux[0] + ":X" new_ips['ips'].append(new_ip) return new_ips top = {} mips = DataSource.get_metrics("ips", DownloadsDS) mpackages = DataSource.get_metrics("packages", DownloadsDS) mpages = DataSource.get_metrics("pages", DownloadsDS) mcountries = DataSource.get_metrics("countries", DownloadsDS) period = None type_analysis = None if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) if filter_ is None: top['ips.'] = filter_ips(mips.get_list(mfilter, 0)) top['packages.'] = mpackages.get_list(mfilter, 0) top['pages.'] = mpages.get_list() top['countries.'] = mcountries.get_list() else: logging.info("DownloadsDS does not support yet top for filters.") return top
def get_top_data(startdate, enddate, identities_db, filter_, npeople): top = {} attendees = DataSource.get_metrics("rsvps", EventsDS) period = attendees.filters.period type_analysis = None mfilter = attendees.filters # updated filters filters = None # original filters if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) if filter_ is not None: filters = attendees.filters attendees.filters = mfilter top['rsvps.'] = attendees.get_list(mfilter, 0) top['rsvps.last month'] = attendees.get_list(mfilter, 31) top['rsvps.last year'] = attendees.get_list(mfilter, 365) if filter_ is not None: attendees.filters = filters events = DataSource.get_metrics("events", EventsDS) if filter_ is not None: filters = events.filters events.filters = mfilter top['events.'] = events.get_list(mfilter, 0) top['events.last month'] = events.get_list(mfilter, 31) top['events.last year'] = events.get_list(mfilter, 365) if filter_ is not None: events.filters = filters if filter_ is not None: groups = DataSource.get_metrics("groups", EventsDS) filters = groups.filters groups.filters = mfilter if filter_.get_name() <> 'repository': top['groups.'] = groups.get_list(mfilter, 0) top['groups.last month'] = groups.get_list(mfilter, 31) top['groups.last year'] = groups.get_list(mfilter, 365) groups.filters = filters return top
def get_agg_data (period, startdate, enddate, identities_db, filter_ = None): if filter_ is not None: if filter_.get_name() != "people2": logging.warn("Mediawiki only supports people2 filter.") return {} metrics = Mediawiki.get_metrics_data(period, startdate, enddate, identities_db, filter_, False) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(Mediawiki, period, startdate, enddate, False) return dict(metrics.items()+studies.items())
def get_filter_items(filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() if (filter_name == "repository"): metric = DataSource.get_metrics("repositories", Pullpo) elif (filter_name == "company"): metric = DataSource.get_metrics("organizations", Pullpo) elif (filter_name == "country"): metric = DataSource.get_metrics("countries", Pullpo) elif (filter_name == "project"): metric = DataSource.get_metrics("projects", Pullpo) elif (filter_name == "people2"): metric = DataSource.get_metrics("people2", Pullpo) else: logging.error("Pullpo " + filter_name + " not supported") return items items = metric.get_list() return items
def get_filter_items(filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() if (filter_name == "repository"): metric = DataSource.get_metrics("groups", EventsDS) items = metric.get_list() else: logging.error("EventsDS " + filter_name + " not supported") return items
def get_filter_items(filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() if (filter_name == "repository"): metric = DataSource.get_metrics("repositories", Pullpo) elif (filter_name == "company"): metric = DataSource.get_metrics("organizations", Pullpo) elif (filter_name == "country"): metric = DataSource.get_metrics("countries", Pullpo) elif (filter_name == "project"): metric = DataSource.get_metrics("projects", Pullpo) elif (filter_name == "people2"): metric = DataSource.get_metrics("people2", Pullpo) else: logging.error("Pullpo " + filter_name + " not supported") return items items = metric.get_list() return items
def get_agg_data (period, startdate, enddate, identities_db, filter_ = None): rfield = MLS.get_repo_field() evolutionary = False metrics = MLS.get_metrics_data(period, startdate, enddate, identities_db, filter_, evolutionary) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(MLS, period, startdate, enddate, evolutionary) agg = dict(metrics.items()+studies.items()) return agg
def get_filter_items(filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() if (filter_name == "repository"): metric = DataSource.get_metrics("groups", EventsDS) items = metric.get_list() else: logging.error("EventsDS " + filter_name + " not supported") return items
def get_top_data (startdate, enddate, identities_db, filter_, npeople): top = {} attendees = DataSource.get_metrics("rsvps", EventsDS) period = attendees.filters.period type_analysis = None mfilter = attendees.filters # updated filters filters = None # original filters if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) if filter_ is not None: filters = attendees.filters attendees.filters = mfilter top['rsvps.'] = attendees.get_list(mfilter, 0) top['rsvps.last month'] = attendees.get_list(mfilter, 31) top['rsvps.last year'] = attendees.get_list(mfilter, 365) if filter_ is not None: attendees.filters = filters events = DataSource.get_metrics("events", EventsDS) if filter_ is not None: filters = events.filters events.filters = mfilter top['events.'] = events.get_list(mfilter, 0) top['events.last month'] = events.get_list(mfilter,31) top['events.last year'] = events.get_list(mfilter, 365) if filter_ is not None: events.filters = filters if filter_ is not None: groups = DataSource.get_metrics("groups", EventsDS) filters = groups.filters groups.filters = mfilter if filter_.get_name() <> 'repository': top['groups.'] = groups.get_list(mfilter, 0) top['groups.last month'] = groups.get_list(mfilter, 31) top['groups.last year'] = groups.get_list(mfilter, 365) groups.filters = filters return top
def get_top_data(startdate, enddate, identities_db, filter_, npeople, threads_top=True): msenders = DataSource.get_metrics("senders", MLS) period = None type_analysis = None if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) top = {} if filter_ is None: top['senders.'] = msenders.get_list(mfilter, 0) top['senders.last month'] = msenders.get_list(mfilter, 31) top['senders.last year'] = msenders.get_list(mfilter, 365) if threads_top: top['threads.'] = MLS.getLongestThreads( startdate, enddate, identities_db, npeople) startdate = datetime.date.today() - datetime.timedelta( days=365) startdate = "'" + str(startdate) + "'" top['threads.last year'] = MLS.getLongestThreads( startdate, enddate, identities_db, npeople) startdate = datetime.date.today() - datetime.timedelta(days=30) startdate = "'" + str(startdate) + "'" top['threads.last month'] = MLS.getLongestThreads( startdate, enddate, identities_db, npeople) else: filter_name = filter_.get_name() item = filter_.get_item() if filter_name in [ "company", "domain", "repository", "domain", "country" ]: if filter_name in [ "company", "domain", "repository", "domain", "country" ]: top['senders.'] = msenders.get_list(mfilter, 0) top['senders.last month'] = msenders.get_list(mfilter, 31) top['senders.last year'] = msenders.get_list(mfilter, 365) else: # Remove filters above if there are performance issues top = msenders.get_list(mfilter) else: top = None return top
def get_agg_data (period, startdate, enddate, identities_db, filter_= None): metrics = Pullpo.get_metrics_data(period, startdate, enddate, identities_db, filter_, False) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(Pullpo, period, startdate, enddate, False) agg = dict(metrics.items()+studies.items()) if (filter_ is None): static_url = Pullpo.get_url() agg = dict(agg.items() + static_url.items()) return agg
def get_agg_data (period, startdate, enddate, identities_db, filter_ = None): if filter_ is not None: sf = EventsDS.get_supported_filters() if filter_.get_name() not in sf: logging.warn("EventsDS only supports " + ",".join(sf)) return {} metrics = EventsDS.get_metrics_data(period, startdate, enddate, identities_db, filter_, False) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(EventsDS, period, startdate, enddate, False) return dict(metrics.items()+studies.items())
def get_evolutionary_data (period, startdate, enddate, identities_db, filter_ = None): metrics = SCM.get_metrics_data(period, startdate, enddate, identities_db, filter_, True) if filter_ is not None: studies = {} logging.info("No studies found for evolutionary report") else: logging.info("Studies found for evolutionary report") studies = DataSource.get_studies_data(SCM, period, startdate, enddate, True) logging.info(studies) evol_data = dict(metrics.items()+studies.items()) return evol_data
def get_agg_data (period, startdate, enddate, identities_db, filter_ = None): if filter_ is not None: sf = EventsDS.get_supported_filters() if filter_.get_name() not in sf: logging.warn("EventsDS only supports " + ",".join(sf)) return {} metrics = EventsDS.get_metrics_data(period, startdate, enddate, identities_db, filter_, False) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(EventsDS, period, startdate, enddate, False) return dict(metrics.items()+studies.items())
def get_filter_items(filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() if (filter_name == "repository"): metric = DataSource.get_metrics("repositories", SCR) elif (filter_name == "company"): metric = DataSource.get_metrics("organizations", SCR) elif (filter_name == "country"): metric = DataSource.get_metrics("countries", SCR) elif (filter_name == "domain"): metric = DataSource.get_metrics("domains", SCR) elif (filter_name == "project"): metric = DataSource.get_metrics("projects", SCR) elif (filter_name == "people2"): metric = DataSource.get_metrics("people2", SCR) else: logging.error("SCR " + filter_name + " not supported") return items # workaround due to bug https://phabricator.wikimedia.org/T116484 from vizgrimoire.report import Report automator = Report.get_config() DS = SCR if DS.get_name()+"_start_date" in Report.get_config()['r']: metric.filters.startdate = "'"+Report.get_config()['r'][DS.get_name()+"_start_date"]+"'" if DS.get_name()+"_end_date" in Report.get_config()['r']: metric.filters.enddate = "'"+Report.get_config()['r'][DS.get_name()+"_end_date"]+"'" # end items = metric.get_list() return items
def get_filter_items(filter_, startdate, enddate, identities_db): items = None filter_name = filter_.get_name() if (filter_name == "people2"): metric = DataSource.get_metrics("authors", Mediawiki) items = metric.get_list() items['name'] = items.pop('authors') else: logging.error("Mediawiki " + filter_name + " not supported") return items return items
def get_top_data (startdate, enddate, identities_db, filter_ = None, npeople = None): top = {} mpackages = DataSource.get_metrics("packages", DownloadsDS) mpages = DataSource.get_metrics("pages", DownloadsDS) mcountries = DataSource.get_metrics("countries", DownloadsDS) period = None type_analysis = None if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) if filter_ is None: top['packages.'] = mpackages.get_list() top['pages.'] = mpages.get_list() top['countries.'] = mcountries.get_list() else: logging.info("DownloadsDS does not support yet top for filters.") return top
def get_agg_data(period, startdate, enddate, identities_db, filter_=None): rfield = MLS.get_repo_field() evolutionary = False metrics = MLS.get_metrics_data(period, startdate, enddate, identities_db, filter_, evolutionary) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(MLS, period, startdate, enddate, evolutionary) agg = dict(metrics.items() + studies.items()) return agg
def get_agg_data(period, startdate, enddate, identities_db, filter_=None): metrics = Pullpo.get_metrics_data(period, startdate, enddate, identities_db, filter_, False) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(Pullpo, period, startdate, enddate, False) agg = dict(metrics.items() + studies.items()) if (filter_ is None): static_url = Pullpo.get_url() agg = dict(agg.items() + static_url.items()) return agg
def get_agg_data (cls, period, startdate, enddate, identities_db, filter_ = None): closed_condition = cls._get_closed_condition() metrics = cls.get_metrics_data(period, startdate, enddate, identities_db, filter_, False) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(cls, period, startdate, enddate, False) agg = dict(metrics.items()+studies.items()) if filter_ is None: data = cls.get_url() agg = dict(agg.items() + data.items()) return agg
def get_evolutionary_data(period, startdate, enddate, identities_db, filter_=None): metrics = Pullpo.get_metrics_data(period, startdate, enddate, identities_db, filter_, True) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(Pullpo, period, startdate, enddate, True) evol_data = dict(metrics.items() + studies.items()) return evol_data
def get_agg_data (cls, period, startdate, enddate, identities_db, filter_ = None): closed_condition = cls._get_closed_condition() metrics = cls.get_metrics_data(period, startdate, enddate, identities_db, filter_, False) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(cls, period, startdate, enddate, False) agg = dict(metrics.items()+studies.items()) if filter_ is None: data = cls.get_url() agg = dict(agg.items() + data.items()) return agg
def get_top_data(startdate, enddate, identities_db, filter_, npeople): top = {} mcsenders = DataSource.get_metrics("csenders", QAForums) masenders = DataSource.get_metrics("asenders", QAForums) mqsenders = DataSource.get_metrics("qsenders", QAForums) mparticipants = DataSource.get_metrics("participants", QAForums) period = None type_analysis = None if filter_ is not None: type_analysis = filter_.get_type_analysis() mfilter = MetricFilters(period, startdate, enddate, type_analysis, npeople) if filter_ is None: top['csenders.'] = mcsenders.get_list(mfilter, 0) top['csenders.last month'] = mcsenders.get_list(mfilter, 31) top['csenders.last year'] = mcsenders.get_list(mfilter, 365) top['asenders.'] = masenders.get_list(mfilter, 0) top['asenders.last month'] = masenders.get_list(mfilter, 31) top['asenders.last year'] = masenders.get_list(mfilter, 365) top['qsenders.'] = mqsenders.get_list(mfilter, 0) top['qsenders.last month'] = mqsenders.get_list(mfilter, 31) top['qsenders.last year'] = mqsenders.get_list(mfilter, 365) top['participants.'] = mparticipants.get_list(mfilter, 0) top['participants.last month'] = mparticipants.get_list( mfilter, 31) top['participants.last year'] = mparticipants.get_list( mfilter, 365) else: logging.info("QAForums does not support yet top for filters.") return top
def _get_data(period, startdate, enddate, i_db, filter_, evol): data = {} type_analysis = None if (filter_ is not None): type_analysis = [filter_.get_name(), filter_.get_item()] logging.warn(DownloadsDS.get_name() + " does not support filters.") return data if (evol): metrics_on = DownloadsDS.get_metrics_core_ts() else: metrics_on = DownloadsDS.get_metrics_core_agg() mfilter = MetricFilters(period, startdate, enddate, type_analysis) all_metrics = DownloadsDS.get_metrics_set(DownloadsDS) for item in all_metrics: if item.id not in metrics_on: continue item.filters = mfilter if evol is False: mvalue = item.get_agg() else: mvalue = item.get_ts() data = dict(data.items() + mvalue.items()) if evol is False: init_date = DownloadsDS.get_date_init(startdate, enddate, None, type_analysis) end_date = DownloadsDS.get_date_end(startdate, enddate, None, type_analysis) data = dict(data.items() + init_date.items() + end_date.items()) # Tendencies metrics_trends = DownloadsDS.get_metrics_core_trends() for i in [7, 30, 365]: for item in all_metrics: if item.id not in metrics_trends: continue period_data = item.get_trends(enddate, i) data = dict(data.items() + period_data.items()) if filter_ is not None: studies = {} else: studies = DataSource.get_studies_data(DownloadsDS, period, startdate, enddate, evol) return dict(data.items() + studies.items())