def get_list(self): # Just get closed per project startdate = self.filters.startdate enddate = self.filters.enddate type_analysis = ['project', None] period = None evol = False mclosed = Closed(self.db, self.filters) mfilter = MetricFilters(period, startdate, enddate, type_analysis) mfilter_orig = mclosed.filters mclosed.filters = mfilter closed = mclosed.get_agg() mclosed.filters = mfilter_orig checkListArray(closed) return closed
def get_list(self): # Just get sent per project startdate = self.filters.startdate enddate = self.filters.enddate type_analysis = ['project', None] period = None evol = False msent = EmailsSent(self.db, self.filters) mfilter = MetricFilters(period, startdate, enddate, type_analysis) mfilter_orig = msent.filters msent.filters = mfilter sent = msent.get_agg() msent.filters = mfilter_orig checkListArray(sent) return sent
def get_list(self): # Just get commits per project startdate = self.filters.startdate enddate = self.filters.enddate type_analysis = ['project', None] period = None evol = False mcommits = Commits(self.db, self.filters) mfilter = MetricFilters(period, startdate, enddate, type_analysis) mfilter_orig = mcommits.filters mcommits.filters = mfilter commits = mcommits.get_agg() mcommits.filters = mfilter_orig checkListArray(commits) return commits
def _get_agg_all(self): # All items data is returned together # Show review and upload time for all pending reviews # and just for reviews pending for reviewers reviewers_pending = True startdate = self.filters.startdate enddate = self.filters.enddate identities_db = self.db.identities_db type_analysis = self.filters.type_analysis bots = [] # First, we need to group by the filter field the data all_items = self.db.get_all_items(self.filters.type_analysis) group_field = self.db.get_group_field(all_items) id_field = group_field.split('.')[1] # remove table name time_to = {"review_time_pending_days_median":[], "review_time_pending_days_avg":[], "review_time_pending_ReviewsWaitingForReviewer_days_median":[], "review_time_pending_ReviewsWaitingForReviewer_days_avg":[], "review_time_pending_upload_days_median":[], "review_time_pending_upload_days_avg":[], "review_time_pending_upload_ReviewsWaitingForReviewer_days_median":[], "review_time_pending_upload_ReviewsWaitingForReviewer_days_avg":[] } q = self.db.GetTimeToReviewPendingQuerySQL(self.filters, identities_db, bots) ttr_data = self.db.ExecuteQuery(q) checkListArray(ttr_data) q = self.db.GetTimeToReviewPendingQuerySQL(self.filters, identities_db, bots, reviewers_pending) ttr_reviewers_data = self.db.ExecuteQuery(q) checkListArray(ttr_reviewers_data) q = self.db.GetTimeToReviewPendingQuerySQL (self.filters, identities_db, bots, False, True) ttr_upload_data = self.db.ExecuteQuery(q) checkListArray(ttr_upload_data) # This query is really slow. q = self.db.GetTimeToReviewPendingQuerySQL (self.filters, identities_db, bots, reviewers_pending, True) ttr_reviewers_upload_data = self.db.ExecuteQuery(q) checkListArray(ttr_reviewers_upload_data) all_items_ids = [] for items in [ttr_data,ttr_reviewers_data,ttr_upload_data,ttr_reviewers_upload_data]: # Get the list of items and add them to the global list all_items_ids = list(Set(items[id_field] + all_items_ids)) time_to['name'] = all_items_ids # Review time for item in time_to['name']: data_item_revtime = [] # Get all values for the item for i in range(0, len(ttr_data[id_field])): if ttr_data[id_field][i] == item: data_item_revtime.append(ttr_data['revtime'][i]) if (len(data_item_revtime) == 0): ttr_median = float("nan") ttr_avg = float("nan") else: ttr_median = median(removeDecimals(data_item_revtime)) ttr_avg = average(removeDecimals(data_item_revtime)) time_to["review_time_pending_days_median"].append(ttr_median) time_to["review_time_pending_days_avg"].append(ttr_avg) # Review time for reviewers for item in time_to['name']: data_item_revtime = [] # Get all values for the item for i in range(0, len(ttr_reviewers_data[id_field])): if ttr_reviewers_data[id_field][i] == item: data_item_revtime.append(ttr_reviewers_data['revtime'][i]) if (len(data_item_revtime) == 0): ttr_reviewers_median = float("nan") ttr_reviewers_avg = float("nan") else: ttr_reviewers_median = median(removeDecimals(data_item_revtime)) ttr_reviewers_avg = average(removeDecimals(data_item_revtime)) time_to["review_time_pending_ReviewsWaitingForReviewer_days_median"].append(ttr_reviewers_median) time_to["review_time_pending_ReviewsWaitingForReviewer_days_avg"].append(ttr_reviewers_avg) # Upload time for item in time_to['name']: data_item_revtime = [] # Get all values for the item for i in range(0, len(ttr_upload_data[id_field])): if ttr_upload_data[id_field][i] == item: data_item_revtime.append(ttr_upload_data['revtime'][i]) if (len(data_item_revtime) == 0): ttr_median_upload = float("nan") ttr_avg_upload = float("nan") else: ttr_median_upload = median(removeDecimals(data_item_revtime)) ttr_avg_upload = average(removeDecimals(data_item_revtime)) time_to["review_time_pending_upload_days_median"].append(ttr_median_upload) time_to["review_time_pending_upload_days_avg"].append(ttr_median_upload) # Upload time for reviewers for item in time_to['name']: data_item_revtime = [] # Get all values for the item for i in range(0, len(ttr_reviewers_upload_data[id_field])): if ttr_reviewers_upload_data[id_field][i] == item: data_item_revtime.append(ttr_reviewers_upload_data['revtime'][i]) if (len(data_item_revtime) == 0): ttr_reviewers_median_upload = float("nan") ttr_reviewers_avg_upload = float("nan") else: ttr_reviewers_median_upload = median(removeDecimals(data_item_revtime)) ttr_reviewers_avg_upload = average(removeDecimals(data_item_revtime)) time_to["review_time_pending_upload_ReviewsWaitingForReviewer_days_median"].append(ttr_reviewers_median_upload) time_to["review_time_pending_upload_ReviewsWaitingForReviewer_days_avg"].append(ttr_reviewers_avg_upload) # In SCR the item field name must be url for repository if self.filters.type_analysis[0] == 'repository': time_to['url'] = time_to.pop('name') return time_to
def _get_ts_all(): """ Get the metrics for all items at the same time """ all_items = self.db.get_all_items(self.filters.type_analysis) group_field = self.db.get_group_field(all_items) id_field = group_field.split('.')[1] # remove table name metrics = ["review_time_pending_reviews", "review_time_pending_days_acc_median", "review_time_pending_upload_reviews", "review_time_pending_upload_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_reviews", "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"] metrics = ["review_time_pending_reviews", "review_time_pending_days_acc_median", "review_time_pending_upload_reviews", "review_time_pending_upload_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_reviews", "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"] acc_pending_time_median_month = {"month":[],"name":[]} # Used to store each month all items data acc_pending_time_median = {"month":[]} # Used to store the final format for metric in metrics: acc_pending_time_median_month[metric] = [] acc_pending_time_median[metric] = [] # months = 2 # to debug for i in range(0, months+1): # Complete the skeletom of the data dict acc_pending_time_median_month["month"].append(start_month+i) acc_pending_time_median["month"].append(start_month+i) acc_pending_time_median_month["name"].append([]) for metric in metrics: acc_pending_time_median_month[metric].append([]) acc_pending_time_median[metric].append([]) for i in range(0, months+1): # First get all data from SQL newtime = self.db.ExecuteQuery(get_sql(start_month+i)) uploadtime = self.db.ExecuteQuery(get_sql(start_month+i, False, True)) newtime_rev = self.db.ExecuteQuery(get_sql(start_month+i, True)) # This is the slow query uploadtime_rev = self.db.ExecuteQuery(get_sql(start_month+i, True, True)) # Build a common list for all items all_items_month_ids = [] # for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]: for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]: checkListArray(data_sql) all_items_month_ids = list(Set(data_sql[id_field]+all_items_month_ids)) acc_pending_time_median_month["name"][i] = all_items_month_ids # Now add the data in a common dict for all metrics in this month # review time for item in all_items_month_ids: data_item = [] for j in range(0, len(newtime[id_field])): if newtime[id_field][j] == item: data_item.append(newtime['newtime'][j]) values = get_values_median(data_item) # print start_month+i, "newtime", item, values nreviews = len(data_item) acc_pending_time_median_month['review_time_pending_reviews'][i].append(nreviews) acc_pending_time_median_month['review_time_pending_days_acc_median'][i].append(values) # upload time for item in all_items_month_ids: data_item = [] for j in range(0, len(uploadtime[id_field])): if uploadtime[id_field][j] == item: data_item.append(uploadtime['uploadtime'][j]) values = get_values_median(data_item) # print start_month+i, "upload", item, values nreviews = len(data_item) acc_pending_time_median_month['review_time_pending_upload_reviews'][i].append(nreviews) acc_pending_time_median_month['review_time_pending_upload_days_acc_median'][i].append(values) # review time reviewers for item in all_items_month_ids: # Now just for reviews waiting for Reviewer data_item = [] for j in range(0, len(newtime_rev[id_field])): if newtime_rev[id_field][j] == item: data_item.append(newtime_rev['newtime'][j]) values = get_values_median(data_item) nreviews = len(data_item) acc_pending_time_median_month['review_time_pending_ReviewsWaitingForReviewer_reviews'][i].append(nreviews) acc_pending_time_median_month['review_time_pending_ReviewsWaitingForReviewer_days_acc_median'][i].append(values) # upload time reviewers for item in all_items_month_ids: data_item = [] for j in range(0, len(uploadtime_rev[id_field])): if uploadtime_rev[id_field][j] == item: data_item.append(uploadtime_rev['uploadtime'][j]) values = get_values_median(data_item) nreviews = len(data_item) acc_pending_time_median_month['review_time_pending_upload_ReviewsWaitingForReviewer_reviews'][i].append(nreviews) acc_pending_time_median_month['review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'][i].append(values) # Now we need to consolidate all names in a single list all_items = [] for lnames in acc_pending_time_median_month['name']: all_items = list(Set(lnames+all_items)) # And now time to create the final version that should be completePeriod for item in all_items: # Add the ts for the item to the final dict for i in range(0, months+1): mitems = acc_pending_time_median_month['name'][i] found_item = False for k in range(0, len(mitems)): if mitems[k] == item: # Found the item, get all metrics for this month found_item = True for metric in metrics: item_metric_month_value = acc_pending_time_median_month[metric][i][k] acc_pending_time_median[metric][i].append(item_metric_month_value) if not found_item: for metric in metrics: # 0 reviews, 0 review time acc_pending_time_median[metric][i].append(0) # Now we need to completePeriods to add time series fields # All the time series are already complete because the way they are built # but we miss some time series fields ts_fields = ['unixtime','date','month','id'] ts_aux = {} ts_aux['month'] = acc_pending_time_median['month'] ts_aux = completePeriodIds(ts_aux, self.filters.period, self.filters.startdate, self.filters.enddate) for field in ts_fields: acc_pending_time_median[field] = ts_aux[field] # After completing the time series, add the name/url series if self.filters.type_analysis[0] != "repository": acc_pending_time_median["name"] = all_items else: acc_pending_time_median["url"] = all_items # And now we need to adjust the format from # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1,URL2_M1], [URL1_M2, URL2_M2],[URL1_M3...]...] # to # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1, URL1_M2, URL1_M3],[URL2_M1...]...] time_to = {} for field in acc_pending_time_median: if field not in metrics: time_to[field] = acc_pending_time_median[field] else: # The new metric field will have an array per item with the time series time_to[field] = [] for i in range(0,len(all_items)): time_to[field].append([]) for metrics_month in acc_pending_time_median[field]: for j in range(0,len(all_items)): time_to[field][j].append(metrics_month[j]) return time_to
def _get_agg_all(self): # All items data is returned together # Show review and upload time for all pending reviews # and just for reviews pending for reviewers reviewers_pending = True startdate = self.filters.startdate enddate = self.filters.enddate identities_db = self.db.identities_db type_analysis = self.filters.type_analysis bots = [] # First, we need to group by the filter field the data all_items = self.db.get_all_items(self.filters.type_analysis) group_field = self.db.get_group_field(all_items) id_field = group_field.split('.')[1] # remove table name time_to = { "review_time_pending_days_median": [], "review_time_pending_days_avg": [], "review_time_pending_ReviewsWaitingForReviewer_days_median": [], "review_time_pending_ReviewsWaitingForReviewer_days_avg": [], "review_time_pending_upload_days_median": [], "review_time_pending_upload_days_avg": [], "review_time_pending_upload_ReviewsWaitingForReviewer_days_median": [], "review_time_pending_upload_ReviewsWaitingForReviewer_days_avg": [] } q = self.db.GetTimeToReviewPendingQuerySQL(self.filters, identities_db, bots) ttr_data = self.db.ExecuteQuery(q) checkListArray(ttr_data) q = self.db.GetTimeToReviewPendingQuerySQL(self.filters, identities_db, bots, reviewers_pending) ttr_reviewers_data = self.db.ExecuteQuery(q) checkListArray(ttr_reviewers_data) q = self.db.GetTimeToReviewPendingQuerySQL(self.filters, identities_db, bots, False, True) ttr_upload_data = self.db.ExecuteQuery(q) checkListArray(ttr_upload_data) # This query is really slow. q = self.db.GetTimeToReviewPendingQuerySQL(self.filters, identities_db, bots, reviewers_pending, True) ttr_reviewers_upload_data = self.db.ExecuteQuery(q) checkListArray(ttr_reviewers_upload_data) all_items_ids = [] for items in [ ttr_data, ttr_reviewers_data, ttr_upload_data, ttr_reviewers_upload_data ]: # Get the list of items and add them to the global list all_items_ids = list(Set(items[id_field] + all_items_ids)) time_to['name'] = all_items_ids # Review time for item in time_to['name']: data_item_revtime = [] # Get all values for the item for i in range(0, len(ttr_data[id_field])): if ttr_data[id_field][i] == item: data_item_revtime.append(ttr_data['revtime'][i]) if (len(data_item_revtime) == 0): ttr_median = float("nan") ttr_avg = float("nan") else: ttr_median = median(removeDecimals(data_item_revtime)) ttr_avg = average(removeDecimals(data_item_revtime)) time_to["review_time_pending_days_median"].append(ttr_median) time_to["review_time_pending_days_avg"].append(ttr_avg) # Review time for reviewers for item in time_to['name']: data_item_revtime = [] # Get all values for the item for i in range(0, len(ttr_reviewers_data[id_field])): if ttr_reviewers_data[id_field][i] == item: data_item_revtime.append(ttr_reviewers_data['revtime'][i]) if (len(data_item_revtime) == 0): ttr_reviewers_median = float("nan") ttr_reviewers_avg = float("nan") else: ttr_reviewers_median = median( removeDecimals(data_item_revtime)) ttr_reviewers_avg = average(removeDecimals(data_item_revtime)) time_to[ "review_time_pending_ReviewsWaitingForReviewer_days_median"].append( ttr_reviewers_median) time_to[ "review_time_pending_ReviewsWaitingForReviewer_days_avg"].append( ttr_reviewers_avg) # Upload time for item in time_to['name']: data_item_revtime = [] # Get all values for the item for i in range(0, len(ttr_upload_data[id_field])): if ttr_upload_data[id_field][i] == item: data_item_revtime.append(ttr_upload_data['revtime'][i]) if (len(data_item_revtime) == 0): ttr_median_upload = float("nan") ttr_avg_upload = float("nan") else: ttr_median_upload = median(removeDecimals(data_item_revtime)) ttr_avg_upload = average(removeDecimals(data_item_revtime)) time_to["review_time_pending_upload_days_median"].append( ttr_median_upload) time_to["review_time_pending_upload_days_avg"].append( ttr_median_upload) # Upload time for reviewers for item in time_to['name']: data_item_revtime = [] # Get all values for the item for i in range(0, len(ttr_reviewers_upload_data[id_field])): if ttr_reviewers_upload_data[id_field][i] == item: data_item_revtime.append( ttr_reviewers_upload_data['revtime'][i]) if (len(data_item_revtime) == 0): ttr_reviewers_median_upload = float("nan") ttr_reviewers_avg_upload = float("nan") else: ttr_reviewers_median_upload = median( removeDecimals(data_item_revtime)) ttr_reviewers_avg_upload = average( removeDecimals(data_item_revtime)) time_to[ "review_time_pending_upload_ReviewsWaitingForReviewer_days_median"].append( ttr_reviewers_median_upload) time_to[ "review_time_pending_upload_ReviewsWaitingForReviewer_days_avg"].append( ttr_reviewers_avg_upload) # In SCR the item field name must be url for repository if self.filters.type_analysis[0] == 'repository': time_to['url'] = time_to.pop('name') return time_to
def _get_ts_all(): """ Get the metrics for all items at the same time """ all_items = self.db.get_all_items(self.filters.type_analysis) group_field = self.db.get_group_field(all_items) id_field = group_field.split('.')[1] # remove table name metrics = [ "review_time_pending_reviews", "review_time_pending_days_acc_median", "review_time_pending_upload_reviews", "review_time_pending_upload_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_reviews", "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_upload_ReviewsWaitingForReviewer_reviews" ] metrics = [ "review_time_pending_reviews", "review_time_pending_days_acc_median", "review_time_pending_upload_reviews", "review_time_pending_upload_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_reviews", "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_upload_ReviewsWaitingForReviewer_reviews" ] acc_pending_time_median_month = { "month": [], "name": [] } # Used to store each month all items data acc_pending_time_median = { "month": [] } # Used to store the final format for metric in metrics: acc_pending_time_median_month[metric] = [] acc_pending_time_median[metric] = [] # months = 2 # to debug for i in range(0, months + 1): # Complete the skeletom of the data dict acc_pending_time_median_month["month"].append(start_month + i) acc_pending_time_median["month"].append(start_month + i) acc_pending_time_median_month["name"].append([]) for metric in metrics: acc_pending_time_median_month[metric].append([]) acc_pending_time_median[metric].append([]) for i in range(0, months + 1): # First get all data from SQL newtime = self.db.ExecuteQuery(get_sql(start_month + i)) uploadtime = self.db.ExecuteQuery( get_sql(start_month + i, False, True)) newtime_rev = self.db.ExecuteQuery( get_sql(start_month + i, True)) # This is the slow query uploadtime_rev = self.db.ExecuteQuery( get_sql(start_month + i, True, True)) # Build a common list for all items all_items_month_ids = [] # for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]: for data_sql in [ newtime, uploadtime, newtime_rev, uploadtime_rev ]: checkListArray(data_sql) all_items_month_ids = list( Set(data_sql[id_field] + all_items_month_ids)) acc_pending_time_median_month["name"][i] = all_items_month_ids # Now add the data in a common dict for all metrics in this month # review time for item in all_items_month_ids: data_item = [] for j in range(0, len(newtime[id_field])): if newtime[id_field][j] == item: data_item.append(newtime['newtime'][j]) values = get_values_median(data_item) # print start_month+i, "newtime", item, values nreviews = len(data_item) acc_pending_time_median_month[ 'review_time_pending_reviews'][i].append(nreviews) acc_pending_time_median_month[ 'review_time_pending_days_acc_median'][i].append( values) # upload time for item in all_items_month_ids: data_item = [] for j in range(0, len(uploadtime[id_field])): if uploadtime[id_field][j] == item: data_item.append(uploadtime['uploadtime'][j]) values = get_values_median(data_item) # print start_month+i, "upload", item, values nreviews = len(data_item) acc_pending_time_median_month[ 'review_time_pending_upload_reviews'][i].append( nreviews) acc_pending_time_median_month[ 'review_time_pending_upload_days_acc_median'][ i].append(values) # review time reviewers for item in all_items_month_ids: # Now just for reviews waiting for Reviewer data_item = [] for j in range(0, len(newtime_rev[id_field])): if newtime_rev[id_field][j] == item: data_item.append(newtime_rev['newtime'][j]) values = get_values_median(data_item) nreviews = len(data_item) acc_pending_time_median_month[ 'review_time_pending_ReviewsWaitingForReviewer_reviews'][ i].append(nreviews) acc_pending_time_median_month[ 'review_time_pending_ReviewsWaitingForReviewer_days_acc_median'][ i].append(values) # upload time reviewers for item in all_items_month_ids: data_item = [] for j in range(0, len(uploadtime_rev[id_field])): if uploadtime_rev[id_field][j] == item: data_item.append(uploadtime_rev['uploadtime'][j]) values = get_values_median(data_item) nreviews = len(data_item) acc_pending_time_median_month[ 'review_time_pending_upload_ReviewsWaitingForReviewer_reviews'][ i].append(nreviews) acc_pending_time_median_month[ 'review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'][ i].append(values) # Now we need to consolidate all names in a single list all_items = [] for lnames in acc_pending_time_median_month['name']: all_items = list(Set(lnames + all_items)) # And now time to create the final version that should be completePeriod for item in all_items: # Add the ts for the item to the final dict for i in range(0, months + 1): mitems = acc_pending_time_median_month['name'][i] found_item = False for k in range(0, len(mitems)): if mitems[k] == item: # Found the item, get all metrics for this month found_item = True for metric in metrics: item_metric_month_value = acc_pending_time_median_month[ metric][i][k] acc_pending_time_median[metric][i].append( item_metric_month_value) if not found_item: for metric in metrics: # 0 reviews, 0 review time acc_pending_time_median[metric][i].append(0) # Now we need to completePeriods to add time series fields # All the time series are already complete because the way they are built # but we miss some time series fields ts_fields = ['unixtime', 'date', 'month', 'id'] ts_aux = {} ts_aux['month'] = acc_pending_time_median['month'] ts_aux = completePeriodIds(ts_aux, self.filters.period, self.filters.startdate, self.filters.enddate) for field in ts_fields: acc_pending_time_median[field] = ts_aux[field] # After completing the time series, add the name/url series if self.filters.type_analysis[0] != "repository": acc_pending_time_median["name"] = all_items else: acc_pending_time_median["url"] = all_items # And now we need to adjust the format from # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1,URL2_M1], [URL1_M2, URL2_M2],[URL1_M3...]...] # to # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1, URL1_M2, URL1_M3],[URL2_M1...]...] time_to = {} for field in acc_pending_time_median: if field not in metrics: time_to[field] = acc_pending_time_median[field] else: # The new metric field will have an array per item with the time series time_to[field] = [] for i in range(0, len(all_items)): time_to[field].append([]) for metrics_month in acc_pending_time_median[field]: for j in range(0, len(all_items)): time_to[field][j].append(metrics_month[j]) return time_to