Example #1
0
    def get_ts(self):
        abandoned_reviews = Abandoned(self.db, self.filters)
        merged_reviews = Merged(self.db, self.filters)
        submitted_reviews = Submitted(self.db, self.filters)

        abandoned = abandoned_reviews.get_ts()
        abandoned = completePeriodIds(abandoned, self.filters.period, self.filters.startdate,
                                      self.filters.enddate)
        # casting the type of the variable in order to use numpy
        # faster way to deal with datasets...
        abandoned_array = numpy.array(abandoned["abandoned"])

        merged = merged_reviews.get_ts()
        merged = completePeriodIds(merged, self.filters.period, self.filters.startdate,
                                      self.filters.enddate)
        merged_array = numpy.array(merged["merged"])

        submitted = submitted_reviews.get_ts()
        submitted = completePeriodIds(submitted, self.filters.period, self.filters.startdate,
                                      self.filters.enddate)
        submitted_array = numpy.array(submitted["submitted"])

        bmi_array = (abandoned_array.astype(float) + merged_array.astype(float)) / submitted_array.astype(float)

        bmi = abandoned
        bmi.pop("abandoned")
        bmi["bmiscr"] = list(bmi_array)

        return bmi
Example #2
0
def GetSentSummaryCompanies (period, startdate, enddate, identities_db, num_organizations, projects_db):
    count = 1
    first_organizations = {}

    metric = DataSource.get_metrics("organizations", MLS)
    organizations = metric.get_list()

    for company in organizations:
        type_analysis = ["company", "'"+company+"'"]
        sent = EvolEmailsSent(period, startdate, enddate, identities_db, type_analysis, projects_db)
        sent = completePeriodIds(sent, period, startdate, enddate)
        # Rename field sent to company name
        sent[company] = sent["sent"]
        del sent['sent']

        if (count <= num_organizations):
            #Case of organizations with entity in the dataset
            first_organizations = dict(first_organizations.items() + sent.items())
        else :
            #Case of organizations that are aggregated in the field Others
            if 'Others' not in first_organizations:
                first_organizations['Others'] = sent[company]
            else:
                first_organizations['Others'] = [a+b for a, b in zip(first_organizations['Others'],sent[company])]
        count = count + 1

    first_organizations = completePeriodIds(first_organizations, period, startdate, enddate)

    return(first_organizations)
Example #3
0
    def get_ts(self):
        abandoned_reviews = Abandoned(self.db, self.filters)
        merged_reviews = Merged(self.db, self.filters)
        submitted_reviews = Submitted(self.db, self.filters)

        abandoned = abandoned_reviews.get_ts()
        abandoned = completePeriodIds(abandoned, self.filters.period, self.filters.startdate,
                                      self.filters.enddate)
        # casting the type of the variable in order to use numpy
        # faster way to deal with datasets...
        abandoned_array = numpy.array(abandoned["abandoned"])

        merged = merged_reviews.get_ts()
        merged = completePeriodIds(merged, self.filters.period, self.filters.startdate,
                                      self.filters.enddate)
        merged_array = numpy.array(merged["merged"])

        submitted = submitted_reviews.get_ts()
        submitted = completePeriodIds(submitted, self.filters.period, self.filters.startdate,
                                      self.filters.enddate)
        submitted_array = numpy.array(submitted["submitted"])

        bmi_array = (abandoned_array.astype(float) + merged_array.astype(float)) / submitted_array.astype(float)

        bmi = abandoned
        bmi.pop("abandoned")
        bmi["bmiscr"] = list(bmi_array)

        return bmi
Example #4
0
    def ticketsTimeToResponseByField(self, period, startdate, enddate, closed_condition, field, values_set):
        condition = "AND i." + field + " = '%s'"
        evol = {}

        for field_value in values_set:
            field_condition = condition % field_value

            fa_alias = 'tfa_%s' % field_value
            data = self.GetTimeToFirstAction(period, startdate, enddate, field_condition, fa_alias)
            if not isinstance(data[fa_alias], (list)): 
                data[fa_alias] = [data[fa_alias]]
                data['date'] = [data['date']]
            if len(data[fa_alias]) == 0: continue
            time_to_fa = self.getMedianAndAvg(period, fa_alias, data['date'], data[fa_alias])
            time_to_fa = completePeriodIds(time_to_fa, period, startdate, enddate)

            fc_alias = 'tfc_%s' % field_value
            data = self.GetTimeToFirstComment(period, startdate, enddate, field_condition, fc_alias)
            if not isinstance(data[fc_alias], (list)): 
                data[fc_alias] = [data[fc_alias]]
                data['date'] = [data['date']]
            time_to_fc = self.getMedianAndAvg(period, fc_alias, data['date'], data[fc_alias])
            time_to_fc = completePeriodIds(time_to_fc, period, startdate, enddate)

            tclosed_alias = 'ttc_%s' % field_value
            data = self.GetTimeClosed(period, startdate, enddate, closed_condition, field_condition, tclosed_alias)
            if not isinstance(data[tclosed_alias], (list)): 
                data[tclosed_alias] = [data[tclosed_alias]]
                data['date'] = [data['date']]
            time_closed = self.getMedianAndAvg(period, tclosed_alias, data['date'], data[tclosed_alias])
            time_closed = completePeriodIds(time_closed, period, startdate, enddate)

            evol = dict(evol.items() + time_to_fa.items() + time_to_fc.items() + time_closed.items())
        return evol
    def result(self, data_source, destdir = None):
        from vizgrimoire.SCR import SCR
        if data_source != SCR or destdir is None: return

        period = self.filters.period
        startdate = self.filters.startdate
        enddate = self.filters.enddate

        code_contrib = {}
        code_contrib["submitters"] = self.GetNewSubmitters()
        code_contrib["mergers"] = self.GetNewMergers()
        code_contrib["abandoners"] = self.GetNewAbandoners()
        createJSON(code_contrib, destdir+"/scr-code-contrib-new.json")

        code_contrib = {}
        code_contrib["submitters"] = self.GetGoneSubmitters()
        code_contrib["mergers"] = self.GetGoneMergers()
        code_contrib["abandoners"] = self.GetGoneAbandoners()
        createJSON(code_contrib, destdir+"/scr-code-contrib-gone.json")


        data = self.GetNewSubmittersActivity()
        evol = {}
        evol['people'] = {}
        for uuid in data['uuid']:
            pdata = self.db.GetPeopleEvolSubmissionsSCR(uuid, period, startdate, enddate)
            pdata = completePeriodIds(pdata, period, startdate, enddate)
            evol['people'][uuid] = {"submissions":pdata['submissions']}
            # Just to have the time series data
            evol = dict(evol.items() + pdata.items())
        if 'changes' in evol:
            del evol['changes'] # closed (metrics) is included in people
        createJSON(evol, destdir+"/new-people-activity-scr-evolutionary.json")

        data = self.GetGoneSubmittersActivity()
        evol = {}
        evol['people'] = {}
        for uuid in data['uuid']:
            pdata = self.db.GetPeopleEvolSubmissionsSCR(uuid, period, startdate, enddate)
            pdata = completePeriodIds(pdata, period, startdate, enddate)
            evol['people'][uuid] = {"submissions":pdata['submissions']}
            # Just to have the time series data
            evol = dict(evol.items() + pdata.items())
        if 'changes' in evol:
            del evol['changes'] # closed (metrics) is included in people
        createJSON(evol, destdir+"/gone-people-activity-scr-evolutionary.json")

        # data = GetPeopleLeaving()
        # createJSON(data, destdir+"/leaving-people-scr.json")

        evol = {}
        data = completePeriodIds(self.db.GetPeopleIntake(0,1), period, startdate, enddate)
        evol[period] = data[period]
        evol['id'] = data['id']
        evol['date'] = data['date']
        evol['num_people_1'] = data['people']
        evol['num_people_1_5'] = completePeriodIds(self.db.GetPeopleIntake(1,5),period, startdate, enddate)['people']
        evol['num_people_5_10'] = completePeriodIds(self.db.GetPeopleIntake(5,10), period, startdate, enddate)['people']
        createJSON(evol, destdir+"/scr-people-intake-evolutionary.json")
Example #6
0
    def ticketsTimeOpenedByType(self, period, startdate, enddate, closed_condition, result_type):
        # Build a set of dates
        dates = completePeriodIds({period : []}, period, startdate, enddate)[period]
        dates.append(dates[-1] + 1) # add one more month

        if result_type == 'action':
            alias = "topened_tfa"
        elif result_type == 'comment':
            alias = "topened_tfc"
        else:
            alias = "topened"

        time_opened = self.getTicketsTimeOpened(period, dates, closed_condition, result_type, alias)
        time_opened = completePeriodIds(time_opened, period, startdate, enddate)
        return time_opened
Example #7
0
    def create_filter_report(filter_, period, startdate, enddate, destdir, npeople, identities_db):
        from vizgrimoire.report import Report
        items = Report.get_items()
        if items is None:
            items =  QAForums.get_filter_items(filter_, startdate, enddate, identities_db)
            if items == None:
                return
            items = items['name']
  
        filter_name = filter_.get_name()

        if not isinstance(items, list):
            items = [items]

        file_items = []
        for item in items:
            if re.compile("^\..*").match(item) is not None: item = "_"+item
            file_items.append(item)

        fn = os.path.join(destdir, filter_.get_filename(QAForums()))
        createJSON(file_items, fn)
        for item in items:
            logging.info(item)
            filter_item = Filter(filter_.get_name(), item)

            evol_data = QAForums.get_evolutionary_data(period, startdate, enddate, identities_db, filter_item)
            fn = os.path.join(destdir, filter_item.get_evolutionary_filename(QAForums()))
            createJSON(completePeriodIds(evol_data, period, startdate, enddate), fn)

            agg = QAForums.get_agg_data(period, startdate, enddate, identities_db, filter_item)
            fn = os.path.join(destdir, filter_item.get_static_filename(QAForums()))
            createJSON(agg, fn)
Example #8
0
    def get_ts(self):
        if self.filters.period != 'month':
            msg = 'Period %s not valid. Currently, only "month" is supported' % \
                self.filters.period
            return ValueError(msg)

        fields = Set([])
        tables = Set([])
        filters = Set([])

        fields.add("unique_visitors uvisitors")
        tables.add("visits_month v")

        query = self.db.BuildQuery(self.filters.period,
                                   self.filters.startdate,
                                   self.filters.enddate,
                                   "v.date",
                                   fields,
                                   tables,
                                   filters,
                                   True,
                                   self.filters.type_analysis,
                                   strict=True)

        ts = self.db.ExecuteQuery(query)
        ts = completePeriodIds(ts, self.filters.period, self.filters.startdate,
                               self.filters.enddate)
        return ts
Example #9
0
    def create_filter_report(filter_, period, startdate, enddate, destdir,
                             npeople, identities_db):
        from vizgrimoire.report import Report
        items = Report.get_items()
        if items is None:
            items = IRC.get_filter_items(filter_, startdate, enddate,
                                         identities_db)
            if (items == None): return

        if not isinstance(items, (list)):
            items = [items]

        fn = os.path.join(destdir, filter_.get_filename(IRC()))
        createJSON(items, fn)

        for item in items:
            # item_name = "'"+ item+ "'"
            logging.info(item)

            filter_item = Filter(filter_.get_name(), item)

            evol_data = IRC.get_evolutionary_data(period, startdate, enddate,
                                                  identities_db, filter_item)
            fn = os.path.join(destdir,
                              filter_item.get_evolutionary_filename(IRC()))
            createJSON(
                completePeriodIds(evol_data, period, startdate, enddate), fn)

            agg = IRC.get_agg_data(period, startdate, enddate, identities_db,
                                   filter_item)
            fn = os.path.join(destdir, filter_item.get_static_filename(IRC()))
            createJSON(agg, fn)
Example #10
0
    def create_filter_report(filter_, period, startdate, enddate, destdir, npeople, identities_db):
        from vizgrimoire.report import Report
        items = Report.get_items()
        if items is None:
            items = IRC.get_filter_items(filter_, startdate, enddate, identities_db)
            if (items == None): return

        if not isinstance(items, (list)):
            items = [items]

        fn = os.path.join(destdir, filter_.get_filename(IRC()))
        createJSON(items, fn)

        for item in items :
            # item_name = "'"+ item+ "'"
            logging.info (item)

            filter_item = Filter(filter_.get_name(), item)

            evol_data = IRC.get_evolutionary_data(period, startdate, enddate, identities_db, filter_item)
            fn = os.path.join(destdir, filter_item.get_evolutionary_filename(IRC()))
            createJSON(completePeriodIds(evol_data, period, startdate, enddate), fn)

            agg = IRC.get_agg_data(period, startdate, enddate, identities_db, filter_item)
            fn = os.path.join(destdir, filter_item.get_static_filename(IRC()))
            createJSON(agg, fn)
Example #11
0
def fill_items(items, data, id_field, evol = False,
               period = None, startdate = None, enddate = None):
    """ Complete data dict items filling with 0 not existing items """
    from vizgrimoire.GrimoireUtils import completePeriodIds

    # This fields should not be modified
    ts_fields = [period, 'unixtime', 'date','id']

    if evol:
        zero_ts = completePeriodIds({id_field:[],period:[]},
                                    period, startdate, enddate)[id_field]
    fields = data.keys()
    if id_field not in fields:
        logging.info("[fill_items] " + id_field + " not found in " + ",".join(data))
        return data
    fields.remove(id_field)
    for id in items:
        if id not in data[id_field]:
            data[id_field].append(id)
            for field in fields:
                if field in ts_fields: continue
                if not evol:
                    data[field].append(0)
                if evol:
                    data[field].append(zero_ts)
    return data
Example #12
0
def GetClosedSummaryCompanies (period, startdate, enddate, identities_db, closed_condition, num_organizations):

    from vizgrimoire.ITS import ITS

    count = 1
    first_organizations = {}

    metric = DataSource.get_metrics("organizations", ITS)
    organizations = metric.get_list()
    organizations = organizations['name']

    for company in organizations:
        type_analysis = ["company", "'"+company+"'"]
        filter_com = MetricFilters(period, startdate, enddate, type_analysis)
        mclosed = ITS.get_metrics("closed", ITS)
        mclosed.filters = filter_com
        closed = mclosed.get_ts()
        # Rename field closed to company name
        closed[company] = closed["closed"]
        del closed['closed']

        if (count <= num_organizations):
            #Case of organizations with entity in the dataset
            first_organizations = dict(first_organizations.items() + closed.items())
        else :
            #Case of organizations that are aggregated in the field Others
            if 'Others' not in first_organizations:
                first_organizations['Others'] = closed[company]
            else:
                first_organizations['Others'] = [a+b for a, b in zip(first_organizations['Others'],closed[company])]
        count = count + 1
    first_organizations = completePeriodIds(first_organizations, period, startdate, enddate)

    return(first_organizations)
Example #13
0
    def _complete_period_ids_items(data, id_field, period, startdate, enddate):
        ts = {}
        # Complete the time series and share the date series
        metrics = data.keys()
        if id_field not in data:
            raise Exception(id_field + " not in " + str(data))

        metrics.remove(id_field)
        metrics.remove(period)
        ts[id_field] = data[id_field]
        for metric in metrics:
            ts[metric] = []

        for i in range (0, len(ts[id_field])):
            for metric in metrics:
                # Standard time series for each metric and convert
                metric_ts = {}
                metric_ts[metric] = data[metric][i]
                metric_ts[period] = data[period][i]
                metric_ts = completePeriodIds(metric_ts, period, startdate, enddate)
                ts[metric].append(metric_ts[metric])
                # Add additional time series fields: unixtime
                metric_ts.pop(metric) # just time series generic fields
                ts = dict(ts.items()+metric_ts.items())
        return ts
Example #14
0
    def get_ts (self):
        """Returns a time series of a specific class

        A timeseries consists of a unixtime date, labels, some other
        fields and the data of the specific instantiated class per
        period. This is built on a hash table.

        This also returns a proper timeseries with a 0-filled array
        if needed.

        """

        query = self._get_sql(True)
        ts = self.db.ExecuteQuery(query)
        if self.filters.type_analysis and self.filters.type_analysis[1] is None:
            id_field = self.db.get_group_field(self.filters.type_analysis[0])
            if 'CONCAT' not in id_field:
                id_field = id_field.split('.')[1] # remove table name
            ts = Metrics._convert_group_to_ts(ts, id_field)
            ts = Metrics._complete_period_ids_items(ts, id_field, self.filters.period,
                                                    self.filters.startdate, self.filters.enddate)
        else:
            ts = completePeriodIds(ts, self.filters.period, 
                                   self.filters.startdate, self.filters.enddate)
        return ts
Example #15
0
    def _complete_period_ids_items(data, id_field, period, startdate, enddate):
        ts = {}
        # Complete the time series and share the date series
        metrics = data.keys()
        if id_field not in data:
            raise Exception(id_field + " not in " + str(data))

        metrics.remove(id_field)
        metrics.remove(period)
        ts[id_field] = data[id_field]
        for metric in metrics:
            ts[metric] = []

        for i in range (0, len(ts[id_field])):
            for metric in metrics:
                # Standard time series for each metric and convert
                metric_ts = {}
                metric_ts[metric] = data[metric][i]
                metric_ts[period] = data[period][i]
                metric_ts = completePeriodIds(metric_ts, period, startdate, enddate)
                ts[metric].append(metric_ts[metric])
                # Add additional time series fields: unixtime
                metric_ts.pop(metric) # just time series generic fields
                ts = dict(ts.items()+metric_ts.items())
        return ts
Example #16
0
    def result(self):

        fields = Set([])
        tables = Set([])
        filters = Set([])

        query = """select distinct(new_value) as states
                   from changes
                   where field = 'Status' """
        states = self.db.ExecuteQuery(query)

        data = {}
        for state in states["states"]:
            query = self._sql(state)
            state_data = self.db.ExecuteQuery(query)
            state_data = completePeriodIds(state_data, self.filters.period,
                                           self.filters.startdate, self.filters.enddate)
            if not data:
                data = state_data
                data[state] = data["changes"]
                data.pop("changes") # remove not needed data
            else:
                data[state] = state_data["changes"]

        # TODO: Hardcoded creation of file
        #createJSON(data, "../../../../json/its-changes.json")

        return data
Example #17
0
    def result(self):

        fields = Set([])
        tables = Set([])
        filters = Set([])

        query = """select distinct(new_value) as states
                   from changes
                   where field = 'Status' """
        states = self.db.ExecuteQuery(query)

        data = {}
        for state in states["states"]:
            if state <> 'Resolved': continue
            query = self._sql(state)
            state_data = self.db.ExecuteQuery(query)
            state_data = completePeriodIds(state_data, self.filters.period,
                                           self.filters.startdate,
                                           self.filters.enddate)
            if not data:
                data = state_data
                data[state] = data["changes"]
                data.pop("changes")  # remove not needed data
            else:
                data[state] = state_data["changes"]

        # TODO: Hardcoded creation of file
        createJSON(data, "../../../../json/its-changes.json")

        return data
Example #18
0
    def get_person_evol(cls, uuid, period, startdate, enddate, identities_db,
                        type_analysis):
        closed_condition = cls._get_closed_condition()

        evol = GetPeopleEvolITS(uuid, period, startdate, enddate,
                                closed_condition)
        evol = completePeriodIds(evol, period, startdate, enddate)
        return evol
Example #19
0
    def create_filter_report(filter_, period, startdate, enddate, destdir,
                             npeople, identities_db):
        from vizgrimoire.report import Report
        items = Report.get_items()
        if items is None:
            items = EventsDS.get_filter_items(filter_, startdate, enddate,
                                              identities_db)
        if (items == None): return

        filter_name = filter_.get_name()
        items = items['name']

        if not isinstance(items, list):
            items = [items]

        file_items = []
        for item in items:
            if re.compile("^\..*").match(item) is not None: item = "_" + item
            file_items.append(item)

        fn = os.path.join(destdir, filter_.get_filename(EventsDS()))
        createJSON(file_items, fn)

        if filter_name in ("repository"):
            items_list = {'name': [], 'events_365': [], 'rsvps_365': []}
        else:
            items_list = items

        for item in items:
            logging.info(item)
            filter_item = Filter(filter_.get_name(), item)

            evol_data = EventsDS.get_evolutionary_data(period, startdate,
                                                       enddate, identities_db,
                                                       filter_item)
            fn = os.path.join(
                destdir, filter_item.get_evolutionary_filename(EventsDS()))
            createJSON(
                completePeriodIds(evol_data, period, startdate, enddate), fn)

            agg = EventsDS.get_agg_data(period, startdate, enddate,
                                        identities_db, filter_item)
            fn = os.path.join(destdir,
                              filter_item.get_static_filename(EventsDS()))
            createJSON(agg, fn)

            if filter_name in ("repository"):
                items_list['name'].append(item.replace('/', '_'))
                items_list['events_365'].append(agg['events_365'])
                items_list['rsvps_365'].append(agg['rsvps_365'])

        EventsDS.create_filter_report_top(filter_, period, startdate, enddate,
                                          destdir, npeople, identities_db)

        fn = os.path.join(destdir, filter_.get_filename(EventsDS()))
        createJSON(items_list, fn)
Example #20
0
    def get_current_states(self, states):
        current_states = {}

        for state in states:
            query = self.__get_sql_current__(state, True)
            data = self.db.ExecuteQuery(query)
            data = completePeriodIds(data, self.filters.period,
                                     self.filters.startdate, self.filters.enddate)
            current_states = dict(current_states.items() + data.items())

        return current_states
Example #21
0
    def get_current_states(self, states):
        current_states = {}

        for state in states:
            query = self.__get_sql_current__(state, True)
            data = self.db.ExecuteQuery(query)
            data = completePeriodIds(data, self.filters.period,
                                     self.filters.startdate, self.filters.enddate)
            current_states = dict(current_states.items() + data.items())

        return current_states
Example #22
0
    def get_ts(self):
        #Specific needs for Added and Removed lines not considered in meta class Metrics
        query = self._get_sql(True)
        data = self.db.ExecuteQuery(query)

        if not (isinstance(data['removed_lines'], list)): data['removed_lines'] = [data['removed_lines']]
        if not (isinstance(data['added_lines'], list)): data['added_lines'] = [data['added_lines']]

        data['removed_lines'] = [float(lines)  for lines in data['removed_lines']]
        data['added_lines'] = [float(lines)  for lines in data['added_lines']]

        return completePeriodIds(data, self.filters.period,
                                 self.filters.startdate, self.filters.enddate)
Example #23
0
def GetCommitsSummaryCompanies(period, startdate, enddate, identities_db,
                               num_organizations):
    # This function returns the following dataframe structrure
    # unixtime, date, week/month/..., company1, company2, ... company[num_organizations -1], others
    # The 3 first fields are used for data and ordering purposes
    # The "companyX" fields are those that provide info about that company
    # The "Others" field is the aggregated value of the rest of the organizations
    # Companies above num_organizations will be aggregated in Others

    from vizgrimoire.SCM import SCM

    metric = DataSource.get_metrics("organizations", SCM)
    organizations = metric.get_list()
    organizations = organizations['name']

    first_organizations = {}
    count = 1
    for company in organizations:
        company_name = "'" + company + "'"
        type_analysis = ['company', company_name]
        mcommits = DataSource.get_metrics("commits", SCM)
        mfilter = MetricFilters(period, startdate, enddate, type_analysis)
        mfilter_orig = mcommits.filters
        mcommits.filters = mfilter
        commits = mcommits.get_ts()
        mcommits.filters = mfilter_orig
        # commits = EvolCommits(period, startdate, enddate, identities_db, ["company", company_name])
        # commits = completePeriodIds(commits, period, startdate, enddate)
        # Rename field commits to company name
        commits[company] = commits["commits"]
        del commits['commits']

        if (count <= num_organizations):
            #Case of organizations with entity in the dataset
            first_organizations = dict(first_organizations.items() +
                                       commits.items())
        else:
            #Case of organizations that are aggregated in the field Others
            if 'Others' not in first_organizations:
                first_organizations['Others'] = commits[company]
            else:
                first_organizations['Others'] = [
                    a + b for a, b in zip(first_organizations['Others'],
                                          commits[company])
                ]
        count = count + 1

    #TODO: remove global variables...
    first_organizations = completePeriodIds(first_organizations, period,
                                            startdate, enddate)
    return (first_organizations)
Example #24
0
    def ticketsTimeOpenedByField(self, period, startdate, enddate, closed_condition, field, values_set, result_type):
        condition = "AND " + field + " = '%s'"
        evol = {}

        # Build a set of dates
        dates = completePeriodIds({period : []}, period, startdate, enddate)[period]
        dates.append(dates[-1] + 1) # add one more month

        for field_value in values_set:
            field_condition = condition % field_value

            if result_type == 'action':
                alias = "topened_tfa_%s" % field_value
            elif result_type == 'comment':
                alias = "topened_tfc_%s" % field_value
            else:
                alias = "topened_%s" % field_value

            time_opened = self.getTicketsTimeOpened(period, dates, closed_condition, result_type, alias, field_condition)
            time_opened = completePeriodIds(time_opened, period, startdate, enddate)
            evol = dict(evol.items() + time_opened.items())

        return evol
Example #25
0
def GetSentSummaryCompanies(period, startdate, enddate, identities_db,
                            num_organizations, projects_db):
    count = 1
    first_organizations = {}

    metric = DataSource.get_metrics("organizations", MLS)
    organizations = metric.get_list()

    for company in organizations:
        type_analysis = ["company", "'" + company + "'"]
        sent = EvolEmailsSent(period, startdate, enddate, identities_db,
                              type_analysis, projects_db)
        sent = completePeriodIds(sent, period, startdate, enddate)
        # Rename field sent to company name
        sent[company] = sent["sent"]
        del sent['sent']

        if (count <= num_organizations):
            #Case of organizations with entity in the dataset
            first_organizations = dict(first_organizations.items() +
                                       sent.items())
        else:
            #Case of organizations that are aggregated in the field Others
            if 'Others' not in first_organizations:
                first_organizations['Others'] = sent[company]
            else:
                first_organizations['Others'] = [
                    a + b for a, b in zip(first_organizations['Others'],
                                          sent[company])
                ]
        count = count + 1

    first_organizations = completePeriodIds(first_organizations, period,
                                            startdate, enddate)

    return (first_organizations)
Example #26
0
    def get_ts(self):
        # Get all posts for each month and determine which from those
        # are still unanswered. Returns the number of unanswered
        # posts on each month.
        period = self.filters.period

        if (self.filters.type_analysis
                and self.filters.type_analysis[0] not in ("repository")):
            return {}

        if (period != "month"):
            logging.error("Period not supported in " + self.id + " " + period)
            return None

        startdate = self.filters.startdate
        enddate = self.filters.enddate

        start = datetime.strptime(startdate, "'%Y-%m-%d'")
        end = datetime.strptime(enddate, "'%Y-%m-%d'")

        start_month = (start.year * 12 + start.month) - 1
        end_month = (end.year * 12 + end.month) - 1
        months = end_month - start_month + 2
        num_unanswered = {'month': [], 'unanswered_posts': []}

        for i in range(0, months):
            unanswered = []
            current_month = start_month + i
            from_date = self.__get_date_from_month(current_month)
            to_date = self.__get_date_from_month(current_month + 1)
            messages = self.__get_messages(from_date, to_date)

            for message in messages:
                message_id = message[0]
                response_of = message[1]

                if response_of is None:
                    unanswered.append(message_id)
                    continue

                if response_of in unanswered:
                    unanswered.remove(response_of)

            num_unanswered['month'].append(current_month)
            num_unanswered['unanswered_posts'].append(len(unanswered))

        return completePeriodIds(num_unanswered, self.filters.period,
                                 self.filters.startdate, self.filters.enddate)
Example #27
0
    def get_ts(self):
        # Get all posts for each month and determine which from those
        # are still unanswered. Returns the number of unanswered
        # posts on each month.
        period = self.filters.period

        if (self.filters.type_analysis and self.filters.type_analysis[0] not in ("repository")):
            return {}

        if (period != "month"):
            logging.error("Period not supported in " + self.id + " " + period)
            return None

        startdate = self.filters.startdate
        enddate = self.filters.enddate

        start = datetime.strptime(startdate, "'%Y-%m-%d'")
        end = datetime.strptime(enddate, "'%Y-%m-%d'")

        start_month = (start.year * 12 + start.month) - 1
        end_month = (end.year * 12 + end.month) - 1
        months = end_month - start_month + 2
        num_unanswered = {'month' : [],
                          'unanswered_posts' : []}

        for i in range(0, months):
            unanswered = []
            current_month = start_month + i
            from_date = self.__get_date_from_month(current_month)
            to_date = self.__get_date_from_month(current_month + 1)
            messages = self.__get_messages(from_date, to_date)

            for message in messages:
                message_id = message[0]
                response_of = message[1]

                if response_of is None:
                    unanswered.append(message_id)
                    continue

                if response_of in unanswered:
                    unanswered.remove(response_of)

            num_unanswered['month'].append(current_month)
            num_unanswered['unanswered_posts'].append(len(unanswered))

        return completePeriodIds(num_unanswered, self.filters.period,
                                 self.filters.startdate, self.filters.enddate)
Example #28
0
    def create_filter_report(filter_, period, startdate, enddate, destdir, npeople, identities_db):
        from vizgrimoire.report import Report
        items = Report.get_items()
        if items is None:
            items = EventsDS.get_filter_items(filter_, startdate, enddate, identities_db)
        if (items == None): return

        filter_name = filter_.get_name()
        items = items['name']

        if not isinstance(items, list):
            items = [items]

        file_items = []
        for item in items:
            if re.compile("^\..*").match(item) is not None: item = "_"+item
            file_items.append(item)

        fn = os.path.join(destdir, filter_.get_filename(EventsDS()))
        createJSON(file_items, fn)

        if filter_name in ("repository"):
            items_list = {'name' : [], 'events_365' : [], 'rsvps_365' : []}
        else:
            items_list = items

        for item in items:
            logging.info(item)
            filter_item = Filter(filter_.get_name(), item)

            evol_data = EventsDS.get_evolutionary_data(period, startdate, enddate, identities_db, filter_item)
            fn = os.path.join(destdir, filter_item.get_evolutionary_filename(EventsDS()))
            createJSON(completePeriodIds(evol_data, period, startdate, enddate), fn)

            agg = EventsDS.get_agg_data(period, startdate, enddate, identities_db, filter_item)
            fn = os.path.join(destdir, filter_item.get_static_filename(EventsDS()))
            createJSON(agg, fn)

            if filter_name in ("repository"):
                items_list['name'].append(item.replace('/', '_'))
                items_list['events_365'].append(agg['events_365'])
                items_list['rsvps_365'].append(agg['rsvps_365'])

        EventsDS.create_filter_report_top(filter_, period, startdate, enddate, destdir, npeople, identities_db)

        fn = os.path.join(destdir, filter_.get_filename(EventsDS()))
        createJSON(items_list, fn)
Example #29
0
def fill_items(items,
               data,
               id_field,
               evol=False,
               period=None,
               startdate=None,
               enddate=None):
    """ Complete data dict items filling with 0 not existing items """
    from vizgrimoire.GrimoireUtils import completePeriodIds

    # This fields should not be modified
    ts_fields = [period, 'unixtime', 'date', 'id']

    if evol:
        zero_ts = completePeriodIds({
            id_field: [],
            period: []
        }, period, startdate, enddate)[id_field]
    fields = data.keys()
    if id_field not in fields:
        logging.info("[fill_items] " + id_field + " not found in " +
                     ",".join(data))
        return data
    fields.remove(id_field)

    if not isinstance(items, list):
        items_ids = [items]
    else:
        items_ids = items

    if type(data[id_field]) != list:
        data[id_field] = [data[id_field]]

    for id in items_ids:
        if id not in data[id_field]:
            data[id_field].append(id)
            for field in fields:
                if type(data[field]) != list:
                    data[field] = [data[field]]

                if field in ts_fields: continue
                if not evol:
                    data[field].append(0)
                if evol:
                    data[field].append(zero_ts)
    return data
Example #30
0
    def get_ts(self):
        #Specific needs for Added and Removed lines not considered in meta class Metrics
        query = self._get_sql(True)
        data = self.db.ExecuteQuery(query)

        if not (isinstance(data['removed_lines'], list)):
            data['removed_lines'] = [data['removed_lines']]
        if not (isinstance(data['added_lines'], list)):
            data['added_lines'] = [data['added_lines']]

        data['removed_lines'] = [
            float(lines) for lines in data['removed_lines']
        ]
        data['added_lines'] = [float(lines) for lines in data['added_lines']]

        return completePeriodIds(data, self.filters.period,
                                 self.filters.startdate, self.filters.enddate)
Example #31
0
def GetCommitsSummaryCompanies (period, startdate, enddate, identities_db, num_organizations):
    # This function returns the following dataframe structrure
    # unixtime, date, week/month/..., company1, company2, ... company[num_organizations -1], others
    # The 3 first fields are used for data and ordering purposes
    # The "companyX" fields are those that provide info about that company
    # The "Others" field is the aggregated value of the rest of the organizations
    # Companies above num_organizations will be aggregated in Others

    from vizgrimoire.SCM import SCM

    metric = DataSource.get_metrics("organizations", SCM)
    organizations = metric.get_list()
    organizations = organizations['name']

    first_organizations = {}
    count = 1
    for company in organizations:
        company_name = "'"+company+"'"
        type_analysis = ['company', company_name]
        mcommits = DataSource.get_metrics("commits", SCM)
        mfilter = MetricFilters(period, startdate, enddate, type_analysis)
        mfilter_orig = mcommits.filters
        mcommits.filters = mfilter
        commits = mcommits.get_ts()
        mcommits.filters = mfilter_orig
        # commits = EvolCommits(period, startdate, enddate, identities_db, ["company", company_name])
        # commits = completePeriodIds(commits, period, startdate, enddate)
        # Rename field commits to company name
        commits[company] = commits["commits"]
        del commits['commits']

        if (count <= num_organizations):
            #Case of organizations with entity in the dataset
            first_organizations = dict(first_organizations.items() + commits.items())
        else :
            #Case of organizations that are aggregated in the field Others
            if 'Others' not in first_organizations:
                first_organizations['Others'] = commits[company]
            else:
                first_organizations['Others'] = [a+b for a, b in zip(first_organizations['Others'],commits[company])]
        count = count + 1

    #TODO: remove global variables...
    first_organizations = completePeriodIds(first_organizations, period, startdate, enddate)
    return(first_organizations)
Example #32
0
def genDates(period, startdate, enddate):
    """ This function generates empty timeseries period

        This is typically used for metric classes that need
        to iterate through the several periods and populate
        the data structure.
    """

    dates = createTimeSeries({})
    dates.pop('id')
    dates[period] = []

    dates = completePeriodIds(dates, period, startdate, enddate)

    # Remove zeros
    dates['date'] = [d for d in dates['date'] if d != 0]
    dates['unixtime'] = [d for d in dates['unixtime'] if d != 0]

    return dates
Example #33
0
def genDates(period, startdate, enddate):
    """ This function generates empty timeseries period

        This is typically used for metric classes that need
        to iterate through the several periods and populate
        the data structure.
    """

    dates = createTimeSeries({})
    dates.pop('id')
    dates[period] = []

    dates = completePeriodIds(dates, period, startdate, enddate)

    # Remove zeros
    dates['date'] = [d for d in dates['date'] if d != 0]
    dates['unixtime'] = [d for d in dates['unixtime'] if d != 0]

    return dates
Example #34
0
    def create_filter_report(filter_, period, startdate, enddate, destdir,
                             npeople, identities_db):
        from vizgrimoire.report import Report
        items = Report.get_items()
        if items is None:
            items = QAForums.get_filter_items(filter_, startdate, enddate,
                                              identities_db)
            if items == None:
                return
            items = items['name']

        filter_name = filter_.get_name()

        if not isinstance(items, list):
            items = [items]

        file_items = []
        for item in items:
            if re.compile("^\..*").match(item) is not None: item = "_" + item
            file_items.append(item)

        fn = os.path.join(destdir, filter_.get_filename(QAForums()))
        createJSON(file_items, fn)
        for item in items:
            logging.info(item)
            filter_item = Filter(filter_.get_name(), item)

            evol_data = QAForums.get_evolutionary_data(period, startdate,
                                                       enddate, identities_db,
                                                       filter_item)
            fn = os.path.join(
                destdir, filter_item.get_evolutionary_filename(QAForums()))
            createJSON(
                completePeriodIds(evol_data, period, startdate, enddate), fn)

            agg = QAForums.get_agg_data(period, startdate, enddate,
                                        identities_db, filter_item)
            fn = os.path.join(destdir,
                              filter_item.get_static_filename(QAForums()))
            createJSON(agg, fn)
Example #35
0
def GetClosedSummaryCompanies(period, startdate, enddate, identities_db,
                              closed_condition, num_organizations):

    from vizgrimoire.ITS import ITS

    count = 1
    first_organizations = {}

    metric = DataSource.get_metrics("organizations", ITS)
    organizations = metric.get_list()
    organizations = organizations['name']

    for company in organizations:
        type_analysis = ["company", "'" + company + "'"]
        filter_com = MetricFilters(period, startdate, enddate, type_analysis)
        mclosed = ITS.get_metrics("closed", ITS)
        mclosed.filters = filter_com
        closed = mclosed.get_ts()
        # Rename field closed to company name
        closed[company] = closed["closed"]
        del closed['closed']

        if (count <= num_organizations):
            #Case of organizations with entity in the dataset
            first_organizations = dict(first_organizations.items() +
                                       closed.items())
        else:
            #Case of organizations that are aggregated in the field Others
            if 'Others' not in first_organizations:
                first_organizations['Others'] = closed[company]
            else:
                first_organizations['Others'] = [
                    a + b for a, b in zip(first_organizations['Others'],
                                          closed[company])
                ]
        count = count + 1
    first_organizations = completePeriodIds(first_organizations, period,
                                            startdate, enddate)

    return (first_organizations)
Example #36
0
    def get_ts(self):
        if self.filters.period != 'month':
            msg = 'Period %s not valid. Currently, only "month" is supported' % \
                self.filters.period
            return ValueError(msg)

        fields = Set([])
        tables = Set([])
        filters = Set([])

        fields.add("unique_visitors uvisitors")
        tables.add("visits_month v")

        query = self.db.BuildQuery(self.filters.period, self.filters.startdate,
                                   self.filters.enddate, "v.date",
                                   fields, tables, filters, True,
                                   self.filters.type_analysis)

        ts = self.db.ExecuteQuery(query)
        ts = completePeriodIds(ts, self.filters.period,
                               self.filters.startdate, self.filters.enddate)
        return ts
Example #37
0
    def get_ts (self):
        """Returns a time series of a specific class

        A timeseries consists of a unixtime date, labels, some other
        fields and the data of the specific instantiated class per
        period. This is built on a hash table.

        This also returns a proper timeseries with a 0-filled array
        if needed.

        """

        query = self._get_sql(True)
        ts = self.db.ExecuteQuery(query)
        if self.filters.type_analysis and self.filters.type_analysis[1] is None:
            id_field = self.db.get_group_field_alias(self.filters.type_analysis[0])
            ts = Metrics._convert_group_to_ts(ts, id_field)
            ts = Metrics._complete_period_ids_items(ts, id_field, self.filters.period,
                                                    self.filters.startdate, self.filters.enddate)
        else:
            ts = completePeriodIds(ts, self.filters.period, 
                                   self.filters.startdate, self.filters.enddate)
        return ts
Example #38
0
 def get_person_evol(uuid, period, startdate, enddate, identities_db, type_analysis):
     evol = GetEvolPeopleMLS(uuid, period, startdate, enddate)
     evol = completePeriodIds(evol, period, startdate, enddate)
     return evol
        def _get_ts_all():
            """ Get the metrics for all items at the same time """

            all_items = self.db.get_all_items(self.filters.type_analysis)
            group_field = self.db.get_group_field(all_items)
            id_field = group_field.split('.')[1] # remove table name

            metrics = ["review_time_pending_reviews",
                       "review_time_pending_days_acc_median",
                       "review_time_pending_upload_reviews",
                       "review_time_pending_upload_days_acc_median",
                       "review_time_pending_ReviewsWaitingForReviewer_days_acc_median",
                       "review_time_pending_ReviewsWaitingForReviewer_reviews",
                       "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median",
                       "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"]

            metrics = ["review_time_pending_reviews",
                       "review_time_pending_days_acc_median",
                       "review_time_pending_upload_reviews",
                       "review_time_pending_upload_days_acc_median",
                       "review_time_pending_ReviewsWaitingForReviewer_days_acc_median",
                       "review_time_pending_ReviewsWaitingForReviewer_reviews",
                       "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median",
                       "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"]

            acc_pending_time_median_month = {"month":[],"name":[]} # Used to store each month all items data
            acc_pending_time_median = {"month":[]} # Used to store the final format

            for metric in metrics:
                acc_pending_time_median_month[metric] = []
                acc_pending_time_median[metric] = []

            # months = 2 # to debug

            for i in range(0, months+1):
                # Complete the skeletom of the data dict
                acc_pending_time_median_month["month"].append(start_month+i)
                acc_pending_time_median["month"].append(start_month+i)
                acc_pending_time_median_month["name"].append([])
                for metric in metrics:
                    acc_pending_time_median_month[metric].append([])
                    acc_pending_time_median[metric].append([])

            for i in range(0, months+1):
                # First get all data from SQL
                newtime = self.db.ExecuteQuery(get_sql(start_month+i))
                uploadtime = self.db.ExecuteQuery(get_sql(start_month+i, False, True))
                newtime_rev = self.db.ExecuteQuery(get_sql(start_month+i, True))
                # This is the slow query
                uploadtime_rev = self.db.ExecuteQuery(get_sql(start_month+i, True, True))
                # Build a common list for all items
                all_items_month_ids = []
                # for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]:
                for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]:
                    checkListArray(data_sql)
                    all_items_month_ids = list(Set(data_sql[id_field]+all_items_month_ids))
                acc_pending_time_median_month["name"][i] = all_items_month_ids

                # Now add the data in a common dict for all metrics in this month
                # review time
                for item in all_items_month_ids:
                    data_item = []
                    for j in range(0, len(newtime[id_field])):
                        if newtime[id_field][j] == item:
                            data_item.append(newtime['newtime'][j])
                    values = get_values_median(data_item)
                    # print start_month+i, "newtime", item, values
                    nreviews = len(data_item)
                    acc_pending_time_median_month['review_time_pending_reviews'][i].append(nreviews)
                    acc_pending_time_median_month['review_time_pending_days_acc_median'][i].append(values)

                # upload time
                for item in all_items_month_ids:
                    data_item = []
                    for j in range(0, len(uploadtime[id_field])):
                        if uploadtime[id_field][j] == item:
                            data_item.append(uploadtime['uploadtime'][j])

                    values = get_values_median(data_item)
                    # print start_month+i, "upload", item, values
                    nreviews = len(data_item)
                    acc_pending_time_median_month['review_time_pending_upload_reviews'][i].append(nreviews)
                    acc_pending_time_median_month['review_time_pending_upload_days_acc_median'][i].append(values)
                # review time reviewers
                for item in all_items_month_ids:
                    # Now just for reviews waiting for Reviewer
                    data_item = []
                    for j in range(0, len(newtime_rev[id_field])):
                        if newtime_rev[id_field][j] == item:
                            data_item.append(newtime_rev['newtime'][j])
                    values = get_values_median(data_item)
                    nreviews = len(data_item)
                    acc_pending_time_median_month['review_time_pending_ReviewsWaitingForReviewer_reviews'][i].append(nreviews)
                    acc_pending_time_median_month['review_time_pending_ReviewsWaitingForReviewer_days_acc_median'][i].append(values)

                # upload time reviewers
                for item in all_items_month_ids:
                    data_item = []
                    for j in range(0, len(uploadtime_rev[id_field])):
                        if uploadtime_rev[id_field][j] == item:
                            data_item.append(uploadtime_rev['uploadtime'][j])
                    values = get_values_median(data_item)
                    nreviews = len(data_item)
                    acc_pending_time_median_month['review_time_pending_upload_ReviewsWaitingForReviewer_reviews'][i].append(nreviews)
                    acc_pending_time_median_month['review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'][i].append(values)

            # Now we need to consolidate all names in a single list
            all_items = []
            for lnames in acc_pending_time_median_month['name']:
                all_items = list(Set(lnames+all_items))
            # And now time to create the final version that should be completePeriod
            for item in all_items:
                # Add the ts for the item to the final dict
                for i in range(0, months+1):
                    mitems = acc_pending_time_median_month['name'][i]
                    found_item = False
                    for k in range(0, len(mitems)):
                        if mitems[k] == item:
                            # Found the item, get all metrics for this month
                            found_item = True
                            for metric in metrics:
                                item_metric_month_value = acc_pending_time_median_month[metric][i][k]
                                acc_pending_time_median[metric][i].append(item_metric_month_value)
                    if not found_item:
                        for metric in metrics:
                            # 0 reviews, 0 review time
                            acc_pending_time_median[metric][i].append(0)

            # Now we need to completePeriods to add time series fields
            # All the time series are already complete because the way they are built
            # but we miss some time series fields
            ts_fields = ['unixtime','date','month','id']
            ts_aux = {}
            ts_aux['month'] =  acc_pending_time_median['month']
            ts_aux = completePeriodIds(ts_aux, self.filters.period,
                                       self.filters.startdate, self.filters.enddate)
            for field in ts_fields:
                acc_pending_time_median[field] = ts_aux[field]

            # After completing the time series, add the name/url series
            if self.filters.type_analysis[0] != "repository":
                acc_pending_time_median["name"] = all_items
            else:
                acc_pending_time_median["url"] = all_items

            # And now we need to adjust the format from
            # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1,URL2_M1], [URL1_M2, URL2_M2],[URL1_M3...]...]
            # to
            # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1, URL1_M2, URL1_M3],[URL2_M1...]...]
            time_to = {}
            for field in acc_pending_time_median:
                if field not in metrics:
                    time_to[field] = acc_pending_time_median[field]
                else:
                    # The new metric field will have an array per item with the time series
                    time_to[field] = []
                    for i in range(0,len(all_items)):
                        time_to[field].append([])
                    for metrics_month in acc_pending_time_median[field]:
                        for j in range(0,len(all_items)):
                            time_to[field][j].append(metrics_month[j])
            return time_to
    def get_ts(self):
        # Get all reviews pending time for each month and compute the median.
        # Return a list with all the medians for all months

        def get_date_from_month(monthid):
            # month format: year*12+month
            year = (monthid-1) / 12
            month = monthid - year*12
            # We need the last day of the month
            import calendar
            last_day = calendar.monthrange(year, month)[1]
            current = str(year)+"-"+str(month)+"-"+str(last_day)
            return (current)

        # SQL for all, for upload  or for waiting for reviewer reviews
        def get_sql(month, reviewers = False, uploaded = False):
            current = get_date_from_month(month)

            sql_max_patchset = self.db.get_sql_max_patchset_for_reviews (current)
            sql_reviews_reviewed = self.db.get_sql_reviews_reviewed(startdate, current)
            sql_reviews_closed = self.db.get_sql_reviews_closed(startdate, current)

            # List of pending reviews before a date: time from new time and from last upload
            fields  = "TIMESTAMPDIFF(SECOND, submitted_on, '"+current+"')/(24*3600) AS newtime,"
            if (uploaded):
                fields = "TIMESTAMPDIFF(SECOND, ch.changed_on, '"+current+"')/(24*3600) AS uploadtime,"
            fields += " YEAR(i.submitted_on)*12+MONTH(i.submitted_on) as month"

            all_items = self.db.get_all_items(self.filters.type_analysis)
            if all_items:
                group_field = self.db.get_group_field(all_items)
                fields = group_field + ", " + fields

            tables = Set([])
            filters = Set([])

            tables.add("issues i")
            tables.add("people")
            tables.add("issues_ext_gerrit ie")
            if (uploaded):
                tables.add("changes ch")
                tables.add("("+sql_max_patchset+") last_patch")
            tables.union_update(self.db.GetSQLReportFrom(self.filters))
            tables = self.db._get_tables_query(tables)

            filters.add("people.id=i.submitted_by")
            filters.add("ie.issue_id=i.id")
            filters.add("i.id NOT IN ("+ sql_reviews_closed +")")
            if (uploaded):
                filters.add("ch.issue_id = i.id")
                filters.add("i.id = last_patch.issue_id")
                filters.add("ch.old_value = last_patch.maxPatchset")
                filters.add("ch.field = 'Upload'")
            if reviewers:
                filters.add("i.id NOT IN (%s) " % (sql_reviews_reviewed))

            filters.union_update(self.db.GetSQLReportWhere(self.filters,"issues"))
            filters = self.db._get_filters_query(filters)
            # All reviews before the month: accumulated key point
            filters += " HAVING month<=" + str(month)
            # Not include future submissions for current month analysis
            # We should no need it with the actual SQL which is correct
            if (uploaded):
                filters += " AND uploadtime >= 0"
            else:
                filters += " AND newtime >= 0"

            filters += " ORDER BY i.submitted_on"

            q = self.db.GetSQLGlobal('i.submitted_on', fields, tables, filters,
                                     startdate,enddate)
            return q

        def get_values_median(values):
            if not isinstance(values, list): values = [values]
            values = removeDecimals(values)
            if (len(values) == 0): values = float('nan')
            else: values = median(values)
            return values

        def _get_ts_all():
            """ Get the metrics for all items at the same time """

            all_items = self.db.get_all_items(self.filters.type_analysis)
            group_field = self.db.get_group_field(all_items)
            id_field = group_field.split('.')[1] # remove table name

            metrics = ["review_time_pending_reviews",
                       "review_time_pending_days_acc_median",
                       "review_time_pending_upload_reviews",
                       "review_time_pending_upload_days_acc_median",
                       "review_time_pending_ReviewsWaitingForReviewer_days_acc_median",
                       "review_time_pending_ReviewsWaitingForReviewer_reviews",
                       "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median",
                       "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"]

            metrics = ["review_time_pending_reviews",
                       "review_time_pending_days_acc_median",
                       "review_time_pending_upload_reviews",
                       "review_time_pending_upload_days_acc_median",
                       "review_time_pending_ReviewsWaitingForReviewer_days_acc_median",
                       "review_time_pending_ReviewsWaitingForReviewer_reviews",
                       "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median",
                       "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"]

            acc_pending_time_median_month = {"month":[],"name":[]} # Used to store each month all items data
            acc_pending_time_median = {"month":[]} # Used to store the final format

            for metric in metrics:
                acc_pending_time_median_month[metric] = []
                acc_pending_time_median[metric] = []

            # months = 2 # to debug

            for i in range(0, months+1):
                # Complete the skeletom of the data dict
                acc_pending_time_median_month["month"].append(start_month+i)
                acc_pending_time_median["month"].append(start_month+i)
                acc_pending_time_median_month["name"].append([])
                for metric in metrics:
                    acc_pending_time_median_month[metric].append([])
                    acc_pending_time_median[metric].append([])

            for i in range(0, months+1):
                # First get all data from SQL
                newtime = self.db.ExecuteQuery(get_sql(start_month+i))
                uploadtime = self.db.ExecuteQuery(get_sql(start_month+i, False, True))
                newtime_rev = self.db.ExecuteQuery(get_sql(start_month+i, True))
                # This is the slow query
                uploadtime_rev = self.db.ExecuteQuery(get_sql(start_month+i, True, True))
                # Build a common list for all items
                all_items_month_ids = []
                # for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]:
                for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]:
                    checkListArray(data_sql)
                    all_items_month_ids = list(Set(data_sql[id_field]+all_items_month_ids))
                acc_pending_time_median_month["name"][i] = all_items_month_ids

                # Now add the data in a common dict for all metrics in this month
                # review time
                for item in all_items_month_ids:
                    data_item = []
                    for j in range(0, len(newtime[id_field])):
                        if newtime[id_field][j] == item:
                            data_item.append(newtime['newtime'][j])
                    values = get_values_median(data_item)
                    # print start_month+i, "newtime", item, values
                    nreviews = len(data_item)
                    acc_pending_time_median_month['review_time_pending_reviews'][i].append(nreviews)
                    acc_pending_time_median_month['review_time_pending_days_acc_median'][i].append(values)

                # upload time
                for item in all_items_month_ids:
                    data_item = []
                    for j in range(0, len(uploadtime[id_field])):
                        if uploadtime[id_field][j] == item:
                            data_item.append(uploadtime['uploadtime'][j])

                    values = get_values_median(data_item)
                    # print start_month+i, "upload", item, values
                    nreviews = len(data_item)
                    acc_pending_time_median_month['review_time_pending_upload_reviews'][i].append(nreviews)
                    acc_pending_time_median_month['review_time_pending_upload_days_acc_median'][i].append(values)
                # review time reviewers
                for item in all_items_month_ids:
                    # Now just for reviews waiting for Reviewer
                    data_item = []
                    for j in range(0, len(newtime_rev[id_field])):
                        if newtime_rev[id_field][j] == item:
                            data_item.append(newtime_rev['newtime'][j])
                    values = get_values_median(data_item)
                    nreviews = len(data_item)
                    acc_pending_time_median_month['review_time_pending_ReviewsWaitingForReviewer_reviews'][i].append(nreviews)
                    acc_pending_time_median_month['review_time_pending_ReviewsWaitingForReviewer_days_acc_median'][i].append(values)

                # upload time reviewers
                for item in all_items_month_ids:
                    data_item = []
                    for j in range(0, len(uploadtime_rev[id_field])):
                        if uploadtime_rev[id_field][j] == item:
                            data_item.append(uploadtime_rev['uploadtime'][j])
                    values = get_values_median(data_item)
                    nreviews = len(data_item)
                    acc_pending_time_median_month['review_time_pending_upload_ReviewsWaitingForReviewer_reviews'][i].append(nreviews)
                    acc_pending_time_median_month['review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'][i].append(values)

            # Now we need to consolidate all names in a single list
            all_items = []
            for lnames in acc_pending_time_median_month['name']:
                all_items = list(Set(lnames+all_items))
            # And now time to create the final version that should be completePeriod
            for item in all_items:
                # Add the ts for the item to the final dict
                for i in range(0, months+1):
                    mitems = acc_pending_time_median_month['name'][i]
                    found_item = False
                    for k in range(0, len(mitems)):
                        if mitems[k] == item:
                            # Found the item, get all metrics for this month
                            found_item = True
                            for metric in metrics:
                                item_metric_month_value = acc_pending_time_median_month[metric][i][k]
                                acc_pending_time_median[metric][i].append(item_metric_month_value)
                    if not found_item:
                        for metric in metrics:
                            # 0 reviews, 0 review time
                            acc_pending_time_median[metric][i].append(0)

            # Now we need to completePeriods to add time series fields
            # All the time series are already complete because the way they are built
            # but we miss some time series fields
            ts_fields = ['unixtime','date','month','id']
            ts_aux = {}
            ts_aux['month'] =  acc_pending_time_median['month']
            ts_aux = completePeriodIds(ts_aux, self.filters.period,
                                       self.filters.startdate, self.filters.enddate)
            for field in ts_fields:
                acc_pending_time_median[field] = ts_aux[field]

            # After completing the time series, add the name/url series
            if self.filters.type_analysis[0] != "repository":
                acc_pending_time_median["name"] = all_items
            else:
                acc_pending_time_median["url"] = all_items

            # And now we need to adjust the format from
            # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1,URL2_M1], [URL1_M2, URL2_M2],[URL1_M3...]...]
            # to
            # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1, URL1_M2, URL1_M3],[URL2_M1...]...]
            time_to = {}
            for field in acc_pending_time_median:
                if field not in metrics:
                    time_to[field] = acc_pending_time_median[field]
                else:
                    # The new metric field will have an array per item with the time series
                    time_to[field] = []
                    for i in range(0,len(all_items)):
                        time_to[field].append([])
                    for metrics_month in acc_pending_time_median[field]:
                        for j in range(0,len(all_items)):
                            time_to[field][j].append(metrics_month[j])
            return time_to

        startdate = self.filters.startdate
        enddate = self.filters.enddate
        identities_db = self.db.identities_db
        type_analysis =  self.filters.type_analysis
        period = self.filters.period
        bots = []

        start = datetime.strptime(startdate, "'%Y-%m-%d'")
        end = datetime.strptime(enddate, "'%Y-%m-%d'")

        if (period != "month"):
            logging.error("Period not supported in " + self.id  + " " + period)
            return None

        start_month = start.year*12 + start.month
        end_month = end.year*12 + end.month
        months = end_month - start_month


        all_items = self.db.get_all_items(self.filters.type_analysis)

        if all_items:
            return _get_ts_all()

        acc_pending_time_median = {"month":[],
                                   "review_time_pending_reviews":[],
                                   "review_time_pending_days_acc_median":[],
                                   "review_time_pending_upload_reviews":[],
                                   "review_time_pending_upload_days_acc_median":[],
                                   "review_time_pending_ReviewsWaitingForReviewer_days_acc_median":[],
                                   "review_time_pending_ReviewsWaitingForReviewer_reviews":[],
                                   "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median":[],
                                   "review_time_pending_upload_ReviewsWaitingForReviewer_reviews":[],}

        for i in range(0, months+1):
            acc_pending_time_median['month'].append(start_month+i)

            reviews = self.db.ExecuteQuery(get_sql(start_month+i))
            values = get_values_median(reviews['newtime'])
            if isinstance(reviews['newtime'], list): nreviews = len(reviews['newtime'])
            else: nreviews = 1 # sure 1?
            acc_pending_time_median['review_time_pending_reviews'].append(nreviews)
            acc_pending_time_median['review_time_pending_days_acc_median'].append(values)
            # upload time
            reviews = self.db.ExecuteQuery(get_sql(start_month+i, False, True))
            values = get_values_median(reviews['uploadtime'])
            if isinstance(reviews['uploadtime'], list): nreviews = len(reviews['uploadtime'])
            else: nreviews = 1
            acc_pending_time_median['review_time_pending_upload_reviews'].append(nreviews)
            acc_pending_time_median['review_time_pending_upload_days_acc_median'].append(values)

            # Now just for reviews waiting for Reviewer
            reviews = self.db.ExecuteQuery(get_sql(start_month+i, True))
            values = get_values_median(reviews['newtime'])
            if isinstance(reviews['newtime'], list): nreviews = len(reviews['newtime'])
            else: nreviews = 1
            acc_pending_time_median['review_time_pending_ReviewsWaitingForReviewer_reviews'].append(nreviews)
            acc_pending_time_median['review_time_pending_ReviewsWaitingForReviewer_days_acc_median'].append(values)

            reviews = self.db.ExecuteQuery(get_sql(start_month+i, True, True))
            values = get_values_median(reviews['uploadtime'])
            if isinstance(reviews['uploadtime'], list): nreviews = len(reviews['uploadtime'])
            else: nreviews = 1
            acc_pending_time_median['review_time_pending_upload_ReviewsWaitingForReviewer_reviews'].append(nreviews)
            acc_pending_time_median['review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'].append(values)

        # Normalize values removing NA and converting to 0. Maybe not a good idea.
        for m in acc_pending_time_median.keys():
            for i in range(0,len(acc_pending_time_median[m])):
                acc_pending_time_median[m][i] = float(acc_pending_time_median[m][i])

        return completePeriodIds(acc_pending_time_median, self.filters.period,
                                 self.filters.startdate, self.filters.enddate)
    def result(self, data_source, destdir=None):
        from vizgrimoire.SCR import SCR
        if data_source != SCR or destdir is None: return

        period = self.filters.period
        startdate = self.filters.startdate
        enddate = self.filters.enddate

        code_contrib = {}
        code_contrib["submitters"] = self.GetNewSubmitters()
        code_contrib["mergers"] = self.GetNewMergers()
        code_contrib["abandoners"] = self.GetNewAbandoners()
        createJSON(code_contrib, destdir + "/scr-code-contrib-new.json")

        code_contrib = {}
        code_contrib["submitters"] = self.GetGoneSubmitters()
        code_contrib["mergers"] = self.GetGoneMergers()
        code_contrib["abandoners"] = self.GetGoneAbandoners()
        createJSON(code_contrib, destdir + "/scr-code-contrib-gone.json")

        data = self.GetNewSubmittersActivity()
        evol = {}
        evol['people'] = {}
        for uuid in data['uuid']:
            pdata = self.db.GetPeopleEvolSubmissionsSCR(
                uuid, period, startdate, enddate)
            pdata = completePeriodIds(pdata, period, startdate, enddate)
            evol['people'][uuid] = {"submissions": pdata['submissions']}
            # Just to have the time series data
            evol = dict(evol.items() + pdata.items())
        if 'changes' in evol:
            del evol['changes']  # closed (metrics) is included in people
        createJSON(evol,
                   destdir + "/new-people-activity-scr-evolutionary.json")

        data = self.GetGoneSubmittersActivity()
        evol = {}
        evol['people'] = {}
        for uuid in data['uuid']:
            pdata = self.db.GetPeopleEvolSubmissionsSCR(
                uuid, period, startdate, enddate)
            pdata = completePeriodIds(pdata, period, startdate, enddate)
            evol['people'][uuid] = {"submissions": pdata['submissions']}
            # Just to have the time series data
            evol = dict(evol.items() + pdata.items())
        if 'changes' in evol:
            del evol['changes']  # closed (metrics) is included in people
        createJSON(evol,
                   destdir + "/gone-people-activity-scr-evolutionary.json")

        # data = GetPeopleLeaving()
        # createJSON(data, destdir+"/leaving-people-scr.json")

        evol = {}
        data = completePeriodIds(self.db.GetPeopleIntake(0, 1), period,
                                 startdate, enddate)
        evol[period] = data[period]
        evol['id'] = data['id']
        evol['date'] = data['date']
        evol['num_people_1'] = data['people']
        evol['num_people_1_5'] = completePeriodIds(
            self.db.GetPeopleIntake(1, 5), period, startdate,
            enddate)['people']
        evol['num_people_5_10'] = completePeriodIds(
            self.db.GetPeopleIntake(5, 10), period, startdate,
            enddate)['people']
        createJSON(evol, destdir + "/scr-people-intake-evolutionary.json")
Example #42
0
 def get_person_evol(uuid, period, startdate, enddate, identities_db, type_analysis):
     evol = Pullpo.get_people_query(uuid, startdate, enddate, True, period)
     evol = completePeriodIds(evol, period, startdate, enddate)
     return evol
Example #43
0
 def get_person_evol(uuid, period, startdate, enddate, identities_db, type_analysis):
     q = ReleasesDS._get_people_sql (uuid, period, startdate, enddate, True)
     return completePeriodIds(ExecuteQuery(q), period, startdate, enddate)
Example #44
0
 def get_person_evol(uuid, period, startdate, enddate, identities_db,
                     type_analysis):
     evol = Pullpo.get_people_query(uuid, startdate, enddate, True, period)
     evol = completePeriodIds(evol, period, startdate, enddate)
     return evol
Example #45
0
 def get_ts_changes(self):
     query = self._get_sqlchanges(True)
     ts = self.db.ExecuteQuery(query)
     return completePeriodIds(ts, self.filters.period,
                              self.filters.startdate, self.filters.enddate)
Example #46
0
    def get_backlog(self, states, backend_type):
        import datetime
        import time

        def update_backlog_count(current_count, backlog_count):
            for state, count in current_count.items():
                backlog_count[state].append(count)

        # Dict to store the results
        data = {self.filters.period : [self.filters.startdate, self.filters.enddate]}
        data = completePeriodIds(data, self.filters.period,
                                 self.filters.startdate, self.filters.enddate)

        tickets_states = {}
        current_status = {}

        # Initialize structures
        for state in states:
            current_status[state] = 0
            data[state] = []

        # Request issues log
        query = self.__get_sql_issues_states__(backend_type)
        issues_log = self.db.ExecuteQuery(query)

        periods = list(data['unixtime'][1:])

        # Add a one period more to avoid problems with
        # data from this period
        last_date = int(time.mktime(datetime.datetime.strptime(
                        self.filters.enddate, "'%Y-%m-%d'").timetuple()))
        periods.append(last_date)

        end_period = int(periods.pop(0))

        for i in range(len(issues_log['issue_id'])):
            issue_id = issues_log['issue_id'][i]
            issue_state = issues_log['status'][i]
            issue_date = int(issues_log['udate'][i])

            # Fill periods without changes on issues states
            while issue_date >= end_period:
                end_period = int(periods.pop(0))
                update_backlog_count(current_status, data)

            if issue_id not in tickets_states:
                # Add ticket to the status dict
                tickets_states[issue_id] = issue_state
            elif tickets_states[issue_id] != issue_state:
                # Decrease the count of tickets with the old state
                # only for predefined states
                old_state = tickets_states[issue_id]
                if old_state in states:
                    current_status[old_state] -= 1

                # Set new status
                tickets_states[issue_id] = issue_state
            else:
                continue # Ignore equal states

            # Increase the count of tickets with issue_state
            # only for predefined states
            if issue_state in states:
                current_status[issue_state] += 1

        # End of the loop. Add the last period values to the backlog count
        update_backlog_count(current_status, data)

        # Fill remaining periods without changes on issues states
        while periods:
            periods.pop(0)
            for state in states:
                data[state].append(data[state][-1])

        return data
    def result(self, data_source, destdir=None):
        from vizgrimoire.SCM import SCM
        if data_source != SCM or destdir is None: return

        result_dict = {}

        period = self.filters.period
        startdate = self.filters.startdate
        enddate = self.filters.enddate

        code_contrib = {}
        code_contrib["authors"] = self.GetNewAuthors()
        result_dict['people_new'] = code_contrib
        createJSON(code_contrib, destdir + "/scm-code-contrib-new.json")

        code_contrib = {}
        code_contrib["authors"] = self.GetGoneAuthors()
        result_dict['people_gone'] = code_contrib
        createJSON(code_contrib, destdir + "/scm-code-contrib-gone.json")

        data = self.GetNewAuthorsActivity()

        evol = {}
        evol['people'] = {}
        for uuid in data['uuid']:
            pdata = self.db.GetEvolPeopleSCM(uuid, period, startdate, enddate)
            pdata = completePeriodIds(pdata, period, startdate, enddate)
            evol['people'][uuid] = {"commits": pdata['commits']}
            # Just to have the time series data
            evol = dict(evol.items() + pdata.items())

        result_dict['people_new_ts'] = evol
        createJSON(evol,
                   destdir + "/new-people-activity-scm-evolutionary.json")

        data = self.GetGoneAuthorsActivity()
        evol = {}
        evol['people'] = {}
        for uuid in data['uuid']:
            pdata = self.db.GetEvolPeopleSCM(uuid, period, startdate, enddate)
            pdata = completePeriodIds(pdata, period, startdate, enddate)
            evol['people'][uuid] = {"commits": pdata['commits']}
            # Just to have the time series data
            evol = dict(evol.items() + pdata.items())
        if 'changes' in evol:
            del evol['changes']  # closed (metrics) is included in people
        result_dict['people_gone_ts'] = evol
        createJSON(evol,
                   destdir + "/gone-people-activity-scm-evolutionary.json")

        # data = GetPeopleLeaving()
        # createJSON(data, destdir+"/leaving-people-scr.json")

        evol = {}
        data = completePeriodIds(self.db.GetPeopleIntake(0, 1), period,
                                 startdate, enddate)
        evol[period] = data[period]
        evol['id'] = data['id']
        evol['date'] = data['date']
        evol['num_people_1'] = data['people']
        evol['num_people_1_5'] = completePeriodIds(
            self.db.GetPeopleIntake(1, 5), period, startdate,
            enddate)['people']
        evol['num_people_5_10'] = completePeriodIds(
            self.db.GetPeopleIntake(5, 10), period, startdate,
            enddate)['people']
        result_dict['people_intake_ts'] = evol
        createJSON(evol, destdir + "/scm-people-intake-evolutionary.json")

        return result_dict
Example #48
0
    def get_ts(self):
        # Get all reviews pending time for each month and compute the median.
        # Return a list with all the medians for all months

        def get_date_from_month(monthid):
            # month format: year*12+month
            year = (monthid - 1) / 12
            month = monthid - year * 12
            # We need the last day of the month
            import calendar
            last_day = calendar.monthrange(year, month)[1]
            current = str(year) + "-" + str(month) + "-" + str(last_day)
            return (current)

        # SQL for all, for upload  or for waiting for reviewer reviews
        def get_sql(month, reviewers=False, uploaded=False):
            current = get_date_from_month(month)

            sql_max_patchset = self.db.get_sql_max_patchset_for_reviews(
                current)
            sql_reviews_reviewed = self.db.get_sql_reviews_reviewed(
                startdate, current)
            sql_reviews_closed = self.db.get_sql_reviews_closed(
                startdate, current)

            # List of pending reviews before a date: time from new time and from last upload
            fields = "TIMESTAMPDIFF(SECOND, submitted_on, '" + current + "')/(24*3600) AS newtime,"
            if (uploaded):
                fields = "TIMESTAMPDIFF(SECOND, ch.changed_on, '" + current + "')/(24*3600) AS uploadtime,"
            fields += " YEAR(i.submitted_on)*12+MONTH(i.submitted_on) as month"

            all_items = self.db.get_all_items(self.filters.type_analysis)
            if all_items:
                group_field = self.db.get_group_field(all_items)
                fields = group_field + ", " + fields

            tables = Set([])
            filters = Set([])

            tables.add("issues i")
            tables.add("people")
            tables.add("issues_ext_gerrit ie")
            if (uploaded):
                tables.add("changes ch")
                tables.add("(" + sql_max_patchset + ") last_patch")
            tables.union_update(self.db.GetSQLReportFrom(self.filters))
            tables = self.db._get_tables_query(tables)

            filters.add("people.id=i.submitted_by")
            filters.add("ie.issue_id=i.id")
            filters.add("i.id NOT IN (" + sql_reviews_closed + ")")
            if (uploaded):
                filters.add("ch.issue_id = i.id")
                filters.add("i.id = last_patch.issue_id")
                filters.add("ch.old_value = last_patch.maxPatchset")
                filters.add("ch.field = 'Upload'")
            if reviewers:
                filters.add("i.id NOT IN (%s) " % (sql_reviews_reviewed))

            filters.union_update(
                self.db.GetSQLReportWhere(self.filters, "issues"))
            filters = self.db._get_filters_query(filters)
            # All reviews before the month: accumulated key point
            filters += " HAVING month<=" + str(month)
            # Not include future submissions for current month analysis
            # We should no need it with the actual SQL which is correct
            if (uploaded):
                filters += " AND uploadtime >= 0"
            else:
                filters += " AND newtime >= 0"

            filters += " ORDER BY i.submitted_on"

            q = self.db.GetSQLGlobal('i.submitted_on', fields, tables, filters,
                                     startdate, enddate)
            return q

        def get_values_median(values):
            if not isinstance(values, list): values = [values]
            values = removeDecimals(values)
            if (len(values) == 0): values = float('0')
            else: values = median(values)
            return values

        def _get_ts_all():
            """ Get the metrics for all items at the same time """

            all_items = self.db.get_all_items(self.filters.type_analysis)
            group_field = self.db.get_group_field(all_items)
            id_field = group_field.split('.')[1]  # remove table name

            metrics = [
                "review_time_pending_reviews",
                "review_time_pending_days_acc_median",
                "review_time_pending_upload_reviews",
                "review_time_pending_upload_days_acc_median",
                "review_time_pending_ReviewsWaitingForReviewer_days_acc_median",
                "review_time_pending_ReviewsWaitingForReviewer_reviews",
                "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median",
                "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"
            ]

            metrics = [
                "review_time_pending_reviews",
                "review_time_pending_days_acc_median",
                "review_time_pending_upload_reviews",
                "review_time_pending_upload_days_acc_median",
                "review_time_pending_ReviewsWaitingForReviewer_days_acc_median",
                "review_time_pending_ReviewsWaitingForReviewer_reviews",
                "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median",
                "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"
            ]

            acc_pending_time_median_month = {
                "month": [],
                "name": []
            }  # Used to store each month all items data
            acc_pending_time_median = {
                "month": []
            }  # Used to store the final format

            for metric in metrics:
                acc_pending_time_median_month[metric] = []
                acc_pending_time_median[metric] = []

            # months = 2 # to debug

            for i in range(0, months + 1):
                # Complete the skeletom of the data dict
                acc_pending_time_median_month["month"].append(start_month + i)
                acc_pending_time_median["month"].append(start_month + i)
                acc_pending_time_median_month["name"].append([])
                for metric in metrics:
                    acc_pending_time_median_month[metric].append([])
                    acc_pending_time_median[metric].append([])

            for i in range(0, months + 1):
                # First get all data from SQL
                newtime = self.db.ExecuteQuery(get_sql(start_month + i))
                uploadtime = self.db.ExecuteQuery(
                    get_sql(start_month + i, False, True))
                newtime_rev = self.db.ExecuteQuery(
                    get_sql(start_month + i, True))
                # This is the slow query
                uploadtime_rev = self.db.ExecuteQuery(
                    get_sql(start_month + i, True, True))
                # Build a common list for all items
                all_items_month_ids = []
                # for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]:
                for data_sql in [
                        newtime, uploadtime, newtime_rev, uploadtime_rev
                ]:
                    checkListArray(data_sql)
                    all_items_month_ids = list(
                        Set(data_sql[id_field] + all_items_month_ids))
                acc_pending_time_median_month["name"][i] = all_items_month_ids

                # Now add the data in a common dict for all metrics in this month
                # review time
                for item in all_items_month_ids:
                    data_item = []
                    for j in range(0, len(newtime[id_field])):
                        if newtime[id_field][j] == item:
                            data_item.append(newtime['newtime'][j])
                    values = get_values_median(data_item)
                    # print start_month+i, "newtime", item, values
                    nreviews = len(data_item)
                    acc_pending_time_median_month[
                        'review_time_pending_reviews'][i].append(nreviews)
                    acc_pending_time_median_month[
                        'review_time_pending_days_acc_median'][i].append(
                            values)

                # upload time
                for item in all_items_month_ids:
                    data_item = []
                    for j in range(0, len(uploadtime[id_field])):
                        if uploadtime[id_field][j] == item:
                            data_item.append(uploadtime['uploadtime'][j])

                    values = get_values_median(data_item)
                    # print start_month+i, "upload", item, values
                    nreviews = len(data_item)
                    acc_pending_time_median_month[
                        'review_time_pending_upload_reviews'][i].append(
                            nreviews)
                    acc_pending_time_median_month[
                        'review_time_pending_upload_days_acc_median'][
                            i].append(values)
                # review time reviewers
                for item in all_items_month_ids:
                    # Now just for reviews waiting for Reviewer
                    data_item = []
                    for j in range(0, len(newtime_rev[id_field])):
                        if newtime_rev[id_field][j] == item:
                            data_item.append(newtime_rev['newtime'][j])
                    values = get_values_median(data_item)
                    nreviews = len(data_item)
                    acc_pending_time_median_month[
                        'review_time_pending_ReviewsWaitingForReviewer_reviews'][
                            i].append(nreviews)
                    acc_pending_time_median_month[
                        'review_time_pending_ReviewsWaitingForReviewer_days_acc_median'][
                            i].append(values)

                # upload time reviewers
                for item in all_items_month_ids:
                    data_item = []
                    for j in range(0, len(uploadtime_rev[id_field])):
                        if uploadtime_rev[id_field][j] == item:
                            data_item.append(uploadtime_rev['uploadtime'][j])
                    values = get_values_median(data_item)
                    nreviews = len(data_item)
                    acc_pending_time_median_month[
                        'review_time_pending_upload_ReviewsWaitingForReviewer_reviews'][
                            i].append(nreviews)
                    acc_pending_time_median_month[
                        'review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'][
                            i].append(values)

            # Now we need to consolidate all names in a single list
            all_items = []
            for lnames in acc_pending_time_median_month['name']:
                all_items = list(Set(lnames + all_items))
            # And now time to create the final version that should be completePeriod
            for item in all_items:
                # Add the ts for the item to the final dict
                for i in range(0, months + 1):
                    mitems = acc_pending_time_median_month['name'][i]
                    found_item = False
                    for k in range(0, len(mitems)):
                        if mitems[k] == item:
                            # Found the item, get all metrics for this month
                            found_item = True
                            for metric in metrics:
                                item_metric_month_value = acc_pending_time_median_month[
                                    metric][i][k]
                                acc_pending_time_median[metric][i].append(
                                    item_metric_month_value)
                    if not found_item:
                        for metric in metrics:
                            # 0 reviews, 0 review time
                            acc_pending_time_median[metric][i].append(0)

            # Now we need to completePeriods to add time series fields
            # All the time series are already complete because the way they are built
            # but we miss some time series fields
            ts_fields = ['unixtime', 'date', 'month', 'id']
            ts_aux = {}
            ts_aux['month'] = acc_pending_time_median['month']
            ts_aux = completePeriodIds(ts_aux, self.filters.period,
                                       self.filters.startdate,
                                       self.filters.enddate)
            for field in ts_fields:
                acc_pending_time_median[field] = ts_aux[field]

            # After completing the time series, add the name/url series
            if self.filters.type_analysis[0] != "repository":
                acc_pending_time_median["name"] = all_items
            else:
                acc_pending_time_median["url"] = all_items

            # And now we need to adjust the format from
            # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1,URL2_M1], [URL1_M2, URL2_M2],[URL1_M3...]...]
            # to
            # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1, URL1_M2, URL1_M3],[URL2_M1...]...]
            time_to = {}
            for field in acc_pending_time_median:
                if field not in metrics:
                    time_to[field] = acc_pending_time_median[field]
                else:
                    # The new metric field will have an array per item with the time series
                    time_to[field] = []
                    for i in range(0, len(all_items)):
                        time_to[field].append([])
                    for metrics_month in acc_pending_time_median[field]:
                        for j in range(0, len(all_items)):
                            time_to[field][j].append(metrics_month[j])
            return time_to

        startdate = self.filters.startdate
        enddate = self.filters.enddate
        identities_db = self.db.identities_db
        type_analysis = self.filters.type_analysis
        period = self.filters.period
        bots = []

        start = datetime.strptime(startdate, "'%Y-%m-%d'")
        end = datetime.strptime(enddate, "'%Y-%m-%d'")

        if (period != "month"):
            logging.error("Period not supported in " + self.id + " " + period)
            return None

        start_month = start.year * 12 + start.month
        end_month = end.year * 12 + end.month
        months = end_month - start_month

        all_items = self.db.get_all_items(self.filters.type_analysis)

        if all_items:
            return _get_ts_all()

        acc_pending_time_median = {
            "month": [],
            "review_time_pending_reviews": [],
            "review_time_pending_days_acc_median": [],
            "review_time_pending_upload_reviews": [],
            "review_time_pending_upload_days_acc_median": [],
            "review_time_pending_ReviewsWaitingForReviewer_days_acc_median":
            [],
            "review_time_pending_ReviewsWaitingForReviewer_reviews": [],
            "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median":
            [],
            "review_time_pending_upload_ReviewsWaitingForReviewer_reviews": [],
        }

        for i in range(0, months + 1):
            acc_pending_time_median['month'].append(start_month + i)

            reviews = self.db.ExecuteQuery(get_sql(start_month + i))
            values = get_values_median(reviews['newtime'])
            if isinstance(reviews['newtime'], list):
                nreviews = len(reviews['newtime'])
            else:
                nreviews = 1  # sure 1?
            acc_pending_time_median['review_time_pending_reviews'].append(
                nreviews)
            acc_pending_time_median[
                'review_time_pending_days_acc_median'].append(values)
            # upload time
            reviews = self.db.ExecuteQuery(
                get_sql(start_month + i, False, True))
            values = get_values_median(reviews['uploadtime'])
            if isinstance(reviews['uploadtime'], list):
                nreviews = len(reviews['uploadtime'])
            else:
                nreviews = 1
            acc_pending_time_median[
                'review_time_pending_upload_reviews'].append(nreviews)
            acc_pending_time_median[
                'review_time_pending_upload_days_acc_median'].append(values)

            # Now just for reviews waiting for Reviewer
            reviews = self.db.ExecuteQuery(get_sql(start_month + i, True))
            values = get_values_median(reviews['newtime'])
            if isinstance(reviews['newtime'], list):
                nreviews = len(reviews['newtime'])
            else:
                nreviews = 1
            acc_pending_time_median[
                'review_time_pending_ReviewsWaitingForReviewer_reviews'].append(
                    nreviews)
            acc_pending_time_median[
                'review_time_pending_ReviewsWaitingForReviewer_days_acc_median'].append(
                    values)

            reviews = self.db.ExecuteQuery(get_sql(start_month + i, True,
                                                   True))
            values = get_values_median(reviews['uploadtime'])
            if isinstance(reviews['uploadtime'], list):
                nreviews = len(reviews['uploadtime'])
            else:
                nreviews = 1
            acc_pending_time_median[
                'review_time_pending_upload_ReviewsWaitingForReviewer_reviews'].append(
                    nreviews)
            acc_pending_time_median[
                'review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'].append(
                    values)

        # Normalize values removing NA and converting to 0. Maybe not a good idea.
        for m in acc_pending_time_median.keys():
            for i in range(0, len(acc_pending_time_median[m])):
                acc_pending_time_median[m][i] = float(
                    acc_pending_time_median[m][i])

        return completePeriodIds(acc_pending_time_median, self.filters.period,
                                 self.filters.startdate, self.filters.enddate)
Example #49
0
        def _get_ts_all():
            """ Get the metrics for all items at the same time """

            all_items = self.db.get_all_items(self.filters.type_analysis)
            group_field = self.db.get_group_field(all_items)
            id_field = group_field.split('.')[1]  # remove table name

            metrics = [
                "review_time_pending_reviews",
                "review_time_pending_days_acc_median",
                "review_time_pending_upload_reviews",
                "review_time_pending_upload_days_acc_median",
                "review_time_pending_ReviewsWaitingForReviewer_days_acc_median",
                "review_time_pending_ReviewsWaitingForReviewer_reviews",
                "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median",
                "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"
            ]

            metrics = [
                "review_time_pending_reviews",
                "review_time_pending_days_acc_median",
                "review_time_pending_upload_reviews",
                "review_time_pending_upload_days_acc_median",
                "review_time_pending_ReviewsWaitingForReviewer_days_acc_median",
                "review_time_pending_ReviewsWaitingForReviewer_reviews",
                "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median",
                "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"
            ]

            acc_pending_time_median_month = {
                "month": [],
                "name": []
            }  # Used to store each month all items data
            acc_pending_time_median = {
                "month": []
            }  # Used to store the final format

            for metric in metrics:
                acc_pending_time_median_month[metric] = []
                acc_pending_time_median[metric] = []

            # months = 2 # to debug

            for i in range(0, months + 1):
                # Complete the skeletom of the data dict
                acc_pending_time_median_month["month"].append(start_month + i)
                acc_pending_time_median["month"].append(start_month + i)
                acc_pending_time_median_month["name"].append([])
                for metric in metrics:
                    acc_pending_time_median_month[metric].append([])
                    acc_pending_time_median[metric].append([])

            for i in range(0, months + 1):
                # First get all data from SQL
                newtime = self.db.ExecuteQuery(get_sql(start_month + i))
                uploadtime = self.db.ExecuteQuery(
                    get_sql(start_month + i, False, True))
                newtime_rev = self.db.ExecuteQuery(
                    get_sql(start_month + i, True))
                # This is the slow query
                uploadtime_rev = self.db.ExecuteQuery(
                    get_sql(start_month + i, True, True))
                # Build a common list for all items
                all_items_month_ids = []
                # for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]:
                for data_sql in [
                        newtime, uploadtime, newtime_rev, uploadtime_rev
                ]:
                    checkListArray(data_sql)
                    all_items_month_ids = list(
                        Set(data_sql[id_field] + all_items_month_ids))
                acc_pending_time_median_month["name"][i] = all_items_month_ids

                # Now add the data in a common dict for all metrics in this month
                # review time
                for item in all_items_month_ids:
                    data_item = []
                    for j in range(0, len(newtime[id_field])):
                        if newtime[id_field][j] == item:
                            data_item.append(newtime['newtime'][j])
                    values = get_values_median(data_item)
                    # print start_month+i, "newtime", item, values
                    nreviews = len(data_item)
                    acc_pending_time_median_month[
                        'review_time_pending_reviews'][i].append(nreviews)
                    acc_pending_time_median_month[
                        'review_time_pending_days_acc_median'][i].append(
                            values)

                # upload time
                for item in all_items_month_ids:
                    data_item = []
                    for j in range(0, len(uploadtime[id_field])):
                        if uploadtime[id_field][j] == item:
                            data_item.append(uploadtime['uploadtime'][j])

                    values = get_values_median(data_item)
                    # print start_month+i, "upload", item, values
                    nreviews = len(data_item)
                    acc_pending_time_median_month[
                        'review_time_pending_upload_reviews'][i].append(
                            nreviews)
                    acc_pending_time_median_month[
                        'review_time_pending_upload_days_acc_median'][
                            i].append(values)
                # review time reviewers
                for item in all_items_month_ids:
                    # Now just for reviews waiting for Reviewer
                    data_item = []
                    for j in range(0, len(newtime_rev[id_field])):
                        if newtime_rev[id_field][j] == item:
                            data_item.append(newtime_rev['newtime'][j])
                    values = get_values_median(data_item)
                    nreviews = len(data_item)
                    acc_pending_time_median_month[
                        'review_time_pending_ReviewsWaitingForReviewer_reviews'][
                            i].append(nreviews)
                    acc_pending_time_median_month[
                        'review_time_pending_ReviewsWaitingForReviewer_days_acc_median'][
                            i].append(values)

                # upload time reviewers
                for item in all_items_month_ids:
                    data_item = []
                    for j in range(0, len(uploadtime_rev[id_field])):
                        if uploadtime_rev[id_field][j] == item:
                            data_item.append(uploadtime_rev['uploadtime'][j])
                    values = get_values_median(data_item)
                    nreviews = len(data_item)
                    acc_pending_time_median_month[
                        'review_time_pending_upload_ReviewsWaitingForReviewer_reviews'][
                            i].append(nreviews)
                    acc_pending_time_median_month[
                        'review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'][
                            i].append(values)

            # Now we need to consolidate all names in a single list
            all_items = []
            for lnames in acc_pending_time_median_month['name']:
                all_items = list(Set(lnames + all_items))
            # And now time to create the final version that should be completePeriod
            for item in all_items:
                # Add the ts for the item to the final dict
                for i in range(0, months + 1):
                    mitems = acc_pending_time_median_month['name'][i]
                    found_item = False
                    for k in range(0, len(mitems)):
                        if mitems[k] == item:
                            # Found the item, get all metrics for this month
                            found_item = True
                            for metric in metrics:
                                item_metric_month_value = acc_pending_time_median_month[
                                    metric][i][k]
                                acc_pending_time_median[metric][i].append(
                                    item_metric_month_value)
                    if not found_item:
                        for metric in metrics:
                            # 0 reviews, 0 review time
                            acc_pending_time_median[metric][i].append(0)

            # Now we need to completePeriods to add time series fields
            # All the time series are already complete because the way they are built
            # but we miss some time series fields
            ts_fields = ['unixtime', 'date', 'month', 'id']
            ts_aux = {}
            ts_aux['month'] = acc_pending_time_median['month']
            ts_aux = completePeriodIds(ts_aux, self.filters.period,
                                       self.filters.startdate,
                                       self.filters.enddate)
            for field in ts_fields:
                acc_pending_time_median[field] = ts_aux[field]

            # After completing the time series, add the name/url series
            if self.filters.type_analysis[0] != "repository":
                acc_pending_time_median["name"] = all_items
            else:
                acc_pending_time_median["url"] = all_items

            # And now we need to adjust the format from
            # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1,URL2_M1], [URL1_M2, URL2_M2],[URL1_M3...]...]
            # to
            # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1, URL1_M2, URL1_M3],[URL2_M1...]...]
            time_to = {}
            for field in acc_pending_time_median:
                if field not in metrics:
                    time_to[field] = acc_pending_time_median[field]
                else:
                    # The new metric field will have an array per item with the time series
                    time_to[field] = []
                    for i in range(0, len(all_items)):
                        time_to[field].append([])
                    for metrics_month in acc_pending_time_median[field]:
                        for j in range(0, len(all_items)):
                            time_to[field][j].append(metrics_month[j])
            return time_to
Example #50
0
    def get_person_evol(cls, uuid, period, startdate, enddate, identities_db, type_analysis):
        closed_condition =  cls._get_closed_condition()

        evol = GetPeopleEvolITS(uuid, period, startdate, enddate, closed_condition)
        evol = completePeriodIds(evol, period, startdate, enddate)
        return evol
Example #51
0
 def get_person_evol(uuid, period, startdate, enddate, identities_db,
                     type_analysis):
     q = ReleasesDS._get_people_sql(uuid, period, startdate, enddate, True)
     return completePeriodIds(ExecuteQuery(q), period, startdate, enddate)
Example #52
0
 def get_person_evol(uuid, period, startdate, enddate, identities_db,
                     type_analysis):
     evol = GetEvolPeopleIRC(uuid, period, startdate, enddate)
     evol = completePeriodIds(evol, period, startdate, enddate)
     return evol
    def result(self, data_source, destdir = None):
        from vizgrimoire.SCM import SCM
        if data_source != SCM or destdir is None: return

        result_dict = {}

        period = self.filters.period
        startdate = self.filters.startdate
        enddate = self.filters.enddate

        code_contrib = {}
        code_contrib["authors"] = self.GetNewAuthors()
        result_dict['people_new'] = code_contrib
        createJSON(code_contrib, destdir+"/scm-code-contrib-new.json")

        code_contrib = {}
        code_contrib["authors"] = self.GetGoneAuthors()
        result_dict['people_gone'] = code_contrib
        createJSON(code_contrib, destdir+"/scm-code-contrib-gone.json")

        data = self.GetNewAuthorsActivity()

        evol = {}
        evol['people'] = {}
        for uuid in data['uuid']:
            pdata = self.db.GetEvolPeopleSCM(uuid, period, startdate, enddate)
            pdata = completePeriodIds(pdata, period, startdate, enddate)
            evol['people'][uuid] = {"commits":pdata['commits']}
            # Just to have the time series data
            evol = dict(evol.items() + pdata.items())

        result_dict['people_new_ts'] = evol
        createJSON(evol, destdir+"/new-people-activity-scm-evolutionary.json")

        data = self.GetGoneAuthorsActivity()
        evol = {}
        evol['people'] = {}
        for uuid in data['uuid']:
            pdata = self.db.GetEvolPeopleSCM(uuid, period, startdate, enddate)
            pdata = completePeriodIds(pdata, period, startdate, enddate)
            evol['people'][uuid] = {"commits":pdata['commits']}
            # Just to have the time series data
            evol = dict(evol.items() + pdata.items())
        if 'changes' in evol:
            del evol['changes'] # closed (metrics) is included in people
        result_dict['people_gone_ts'] = evol
        createJSON(evol, destdir+"/gone-people-activity-scm-evolutionary.json")

        # data = GetPeopleLeaving()
        # createJSON(data, destdir+"/leaving-people-scr.json")

        evol = {}
        data = completePeriodIds(self.db.GetPeopleIntake(0,1), period, startdate, enddate)
        evol[period] = data[period]
        evol['id'] = data['id']
        evol['date'] = data['date']
        evol['num_people_1'] = data['people']
        evol['num_people_1_5'] = completePeriodIds(self.db.GetPeopleIntake(1,5),period, startdate, enddate)['people']
        evol['num_people_5_10'] = completePeriodIds(self.db.GetPeopleIntake(5,10), period, startdate, enddate)['people']
        result_dict['people_intake_ts'] = evol
        createJSON(evol, destdir+"/scm-people-intake-evolutionary.json")

        return result_dict
Example #54
0
 def get_ts_changes(self):
     query = self._get_sqlchanges(True)
     ts = self.db.ExecuteQuery(query)
     return completePeriodIds(ts, self.filters.period,
                              self.filters.startdate, self.filters.enddate)
Example #55
0
    def get_backlog(self, states, backend_type):
        import datetime
        import time

        def update_backlog_count(current_count, backlog_count):
            for state, count in current_count.items():
                backlog_count[state].append(count)

        # Dict to store the results
        data = {self.filters.period : [self.filters.startdate, self.filters.enddate]}
        data = completePeriodIds(data, self.filters.period,
                                 self.filters.startdate, self.filters.enddate)

        tickets_states = {}
        current_status = {}

        # Initialize structures
        for state in states:
            current_status[state] = 0
            data[state] = []

        # Request issues log
        query = self.__get_sql_issues_states__(backend_type)
        issues_log = self.db.ExecuteQuery(query)

        periods = list(data['unixtime'][1:])

        # Add a one period more to avoid problems with
        # data from this period
        last_date = int(time.mktime(datetime.datetime.strptime(
                        self.filters.enddate, "'%Y-%m-%d'").timetuple()))
        periods.append(last_date)

        end_period = int(periods.pop(0))

        for i in range(len(issues_log['issue_id'])):
            issue_id = issues_log['issue_id'][i]
            issue_state = issues_log['status'][i]
            issue_date = int(issues_log['udate'][i])

            # Fill periods without changes on issues states
            while issue_date >= end_period:
                end_period = int(periods.pop(0))
                update_backlog_count(current_status, data)

            if issue_id not in tickets_states:
                # Add ticket to the status dict
                tickets_states[issue_id] = issue_state
            elif tickets_states[issue_id] != issue_state:
                # Decrease the count of tickets with the old state
                # only for predefined states
                old_state = tickets_states[issue_id]
                if old_state in states:
                    current_status[old_state] -= 1

                # Set new status
                tickets_states[issue_id] = issue_state
            else:
                continue # Ignore equal states

            # Increase the count of tickets with issue_state
            # only for predefined states
            if issue_state in states:
                current_status[issue_state] += 1

        # End of the loop. Add the last period values to the backlog count
        update_backlog_count(current_status, data)

        # Fill remaining periods without changes on issues states
        while periods:
            periods.pop(0)
            for state in states:
                data[state].append(data[state][-1])

        return data