def calculate_poc_ideal_rate_line(self):
        """This function calculates the percentage of completion (poc) line given what has been realised so far and what still
        needs to be done to make the target deadline. This line is expressed in rate per day. The line is based on the
        poc real rate line and is extended with the daily rate that is required to make the target deadline.
        In the calculation of the required daily rate also holiday periods with zero activity are taken into account.

        Returns:
            poc ideal rate line (object)
        """
        poc_real_rate_line = self.calculate_poc_real_rate_line()
        target_rate_line = self.calculate_target_rate_line()
        distance_to_max_value = poc_real_rate_line.distance_to_max_value()
        daysleft = poc_real_rate_line.daysleft(end=target_rate_line.domain.end)
        # normal case: when there is still work to do and there is time left before the target deadline
        if (distance_to_max_value > 0) & (daysleft > 0):
            domain = DateDomain(
                begin=poc_real_rate_line.domain.end, end=target_rate_line.domain.end
            )
            holidays_in_date_range = self.count_holidays_in_date_range(
                self.holiday_periods, domain.domain
            )
            domain = DateDomain(
                begin=poc_real_rate_line.domain.end,
                end=target_rate_line.domain.end - timedelta(holidays_in_date_range),
            )
            slope = distance_to_max_value / (daysleft - holidays_in_date_range)

            line = poc_real_rate_line.append(
                TimeseriesLine(data=slope, domain=domain), skip=1
            )
        # exception: when there is still work to do but the target deadline has already passed
        elif (distance_to_max_value > 0) & (daysleft <= 0):
            slope = (
                distance_to_max_value / 7
            )  # past deadline, production needs to be finish within a week
            domain = DateDomain(
                begin=poc_real_rate_line.domain.end,
                end=pd.Timestamp.now() + timedelta(7),
            )
            line = poc_real_rate_line.append(
                TimeseriesLine(data=slope, domain=domain), skip=1
            )
        # no more work to do, so ideal line == realised line
        else:
            line = poc_real_rate_line
        holiday_periods = self.slice_holiday_periods(
            holiday_periods=self.holiday_periods,
            periods_to_remove=poc_real_rate_line.domain.domain,
        )
        line = self.add_holiday_periods_to_line(line, holiday_periods)
        line.name = "poc_ideal_indicator"
        line.max_value = self.phase_data["total_units"]
        return line
    def perform(self):
        """
        Aggregate to clusters and retrieve the counts of connection types per project,
        then make the result into a record.

        Returns: Record ready to be written to the firestore, containing clustered data.

        """
        df = self.apply_business_rules()
        project_aggregate = self.aggregate(
            df=df,
            by=["project", "afsluitcode", "afsluitdatum"],
            agg_function={"order_nummer": "count"},
        )

        record_list = RecordList()
        for project in list(project_aggregate.index.get_level_values(level=0).unique()):
            for afsluitcode in list(
                project_aggregate.loc[project].index.get_level_values(level=0).unique()
            ):
                line_project = TimeseriesLine(
                    data=pd.Series(
                        project_aggregate.loc[project].loc[afsluitcode]["order_nummer"]
                    ),
                    name=self.graph_name,
                    max_value=None,
                    project=project,
                )
                record_list.append(self.to_record(line_project, afsluitcode))

        return record_list
    def _make_extrapolated_line(self, realisation_rate_line, mean_rate, total_amount):
        """Extrapolates the realised timeseriesline with the average speed until the end of the project is reached.

        Args:
            realisation_rate_line: realised timeseries line calculated with the RealisationHPendIndicator.
            mean_rate: the average speed for extrapolation.
            total_amount: the total number of houses to be realised in the project.

        Returns:
            Extrapolated timeseries line
        """
        start_date = realisation_rate_line.domain.end
        distance_to_max_value = (
            total_amount - realisation_rate_line.integrate().get_most_recent_point()
        )
        n_days = distance_to_max_value / mean_rate
        if n_days >= 1:
            n_days_int = math.floor(n_days)
            # small correction so that the predicted amount == total amount on the last day
            mean_rate_corrected = (
                mean_rate + (n_days - n_days_int) * mean_rate / n_days_int
            )
        else:
            n_days_int = 1
            mean_rate_corrected = mean_rate * n_days

        domain = DateDomainRange(begin=start_date, n_days=n_days_int)
        line = TimeseriesLine(data=mean_rate_corrected, domain=domain)
        return line
    def calculate_poc_verwacht_rate_line(self):
        """This function calculates the percentage of completion (poc) line given what has been realised so far and what is
        expected that will be done given past performance. This line is expressed in rate per day. The line is based on
        the poc real rate line and is extended with a daily rate that is based on the average performance during
        the last months. In the calculation of the expected daily rate also holiday periods with zero activity are
        taken into account.

        Returns:
            poc real rate line (object)
        """
        poc_real_rate_line = self.calculate_poc_real_rate_line()
        slope = poc_real_rate_line.integrate().extrapolate(data_partition=0.5).slope
        # when there not enough realised data pionts, we take the ideal speed as slope
        if slope == 0:
            slope = self.phase_data["performance_norm_unit"]
        distance_to_max_value = poc_real_rate_line.distance_to_max_value()
        daysleft = poc_real_rate_line.daysleft(slope=slope)
        # if there is work to do we extend the pocreal line, if not ideal line == realised line
        if distance_to_max_value > 0:
            domain = DateDomainRange(
                begin=poc_real_rate_line.domain.end, n_days=daysleft
            )
            line = poc_real_rate_line.append(
                TimeseriesLine(data=slope, domain=domain), skip=1
            )
        else:
            line = poc_real_rate_line
        holiday_periods = self.slice_holiday_periods(
            self.holiday_periods, poc_real_rate_line.domain.domain
        )
        line = self.add_holiday_periods_to_line(line, holiday_periods)
        line.name = "poc_verwacht_indicator"
        line.max_value = self.phase_data["total_units"]
        return line
Exemple #5
0
    def create_line(value):
        """
        Creates a timseriesline from a single data point, on todays date.

        Args:
            value: value to be made into a timeseriesline

        Returns: a TimeseriesLine with index today and one value

        """
        domain = DateDomain(pd.datetime.today(), pd.datetime.today())
        return TimeseriesLine(domain=domain, data=value)
    def calculate_poc_real_rate_line(self):
        """This function calculates the percentage of completion (poc) line given what has been realised so far. This line is
        is expressed in rate per day. The line is based on the historical data of this phase in the given project.

        Returns:
            poc real rate line (object)
        """
        ds = self.df[self.phase_data["phase_column"]]
        line = TimeseriesLine(
            data=ds, name="poc_real_indicator", max_value=self.phase_data["total_units"]
        )
        return line
Exemple #7
0
    def _make_project_line(self, project, df):
        """
        calculates a TimeseriesLine for a given project
        Args:
            project (str): project name
            df (pd.DataFrame): dataframe containing planning of a project

        Returns: Timeseriesline with planning for the project

        """
        data = df.droplevel(level=0)
        return TimeseriesLine(data=data,
                              name=self.indicator_name,
                              project=project)
    def _make_project_line(self, project):
        """
        Creates a line which contains the required daily speed (houses / day) in a given phase of the project
        given the targeted start date, end date and / or speed of the project. The target information
        is stored at the dictionary project_info.

        Args:
            project

        Returns:
            Timeseries line
        """
        start_project = self.project_info[project][self.type_start_date]
        end_project = self.project_info[project][self.type_end_date]
        if end_project:
            end_project = (
                pd.to_datetime(end_project) -
                timedelta(self.n_days_shift_end_date)).strftime("%Y-%m-%d")
        total_houses = self.project_info[project][self.type_total_houses]
        total_meters = self.project_info[project][self.type_total_meters]
        speed_project = self.project_info[project][self.type_speed]

        if speed_project and total_meters and total_houses:
            slope = speed_project / 7 * total_houses / total_meters
        elif not speed_project and start_project and end_project and total_houses:
            slope = (total_houses / (pd.to_datetime(end_project) -
                                     pd.to_datetime(start_project)).days)
        else:
            slope = None

        if start_project and slope and total_houses:
            n_days = total_houses / slope
            n_days_int = math.floor(n_days)
            domain = DateDomainRange(begin=start_project,
                                     n_days=n_days_int - 1)
            # small correction so that the predicted amount == total amount on the last day
            slope_corrected = slope + (n_days -
                                       n_days_int) * slope / n_days_int
            line = TimeseriesLine(
                data=slope_corrected,
                domain=domain,
                name=self.indicator_name,
                max_value=total_houses,
                project=project,
            )
        else:
            line = None

        return line
 def get_progress_of_realisatie(
     self, df_realisatie, project, this_week, total_units
 ):
     if project in df_realisatie.index:
         data = df_realisatie.loc[project]
         realisatie_series = (
             TimeseriesLine(data=data)
             .resample(freq="W-MON", method="sum")
             .integrate()
             .make_series()
         )
         if this_week in realisatie_series.index:
             progress = realisatie_series.loc[this_week] / total_units
         else:
             progress = realisatie_series.iloc[-1] / total_units
     else:
         progress = None
     return progress
    def calculate_target_rate_line(self):
        """This functions calculates the target line expressed in rate per day. The line is based on the start date,
        number of days and performance norm as specified at phase data.

        Returns:
            target rate line (object)
        """
        intercept = self.phase_data["performance_norm_unit"]
        domain = DateDomainRange(
            begin=self.phase_data["start_date"], n_days=self.phase_data["n_days"]
        )
        line = TimeseriesLine(
            data=intercept,
            domain=domain,
            name="target_indicator",
            max_value=self.phase_data["total_units"],
        )
        return line
    def _add_holiday_period(self, line, holiday_period):
        """
        Helper function to add a single rest period to a TimeseriesLine
        Args:
            line:
            rest_period:

        Returns:

        """
        holiday_period_line = TimeseriesLine(
            domain=DateDomain(begin=holiday_period[0], end=holiday_period[-1]), data=0
        )
        before_line = line.slice(end=holiday_period.min())
        after_line = line.slice(begin=holiday_period.min()).translate_x(
            len(holiday_period)
        )
        return before_line.append(holiday_period_line, skip=1, skip_base=True).append(
            after_line
        )
    def perform(self):
        """
        Main perform to do all necessary calculations for indicator.

        Returns: List of Records with lines per project.

        """
        df = self.aggregate(df=self.apply_business_rules())

        line_list = []
        record_list = RecordList()
        if not df.empty:
            for project, timeseries in df.groupby(level=0):
                if len(timeseries):
                    if self.type_total_amount:
                        max_value = self.project_info[project][self.type_total_amount]
                    else:
                        max_value = None
                    line_project = TimeseriesLine(
                        data=timeseries.droplevel(0),
                        name=self.indicator_name,
                        max_value=max_value,
                        project=project,
                    )
                    line_list.append(line_project)
                    record_list.append(self.to_record(line_project))

            line_client = concat(
                line_list, name=self.indicator_name, project="client_aggregate"
            )
            line_list.append(line_client)
            record_list.append(self.to_record(line_client))

        if self.return_lines:
            return line_list
        else:
            return record_list
    def _make_project_line(self, project):
        """Creates a line for given project that extrapolates the daily speed of realisation line (HPend / day)
        that has been achieved so far. The extrapolation is based on the average of a set of daily speeds
        in the realisation line (on default the set of speeds at the last 50% of days on which HPend has been realized).
        If too little has been realized on the project for calculation of an average, the extrapolation
        is based on the average speed over all projects.

        Args:
            project

        Returns:
            Timeseries Line
        """
        start_date = self.project_info[project][self.type_start_date]
        total_amount = self.project_info[project][self.type_total_amount]
        realisation_rate_line = RealisationHPendIndicator(
            project_info=self.project_info,
            df=self.df[self.df.project == project],
            client=self.client,
            return_lines=True,
            silence=True,
        ).perform()
        if realisation_rate_line:
            realisation_rate_line = realisation_rate_line[0]
        else:
            realisation_rate_line = None
        mean_rate = self.mean_realisation_rate_client
        if realisation_rate_line:
            if len(realisation_rate_line) >= 2:
                mean_rate, _ = realisation_rate_line.integrate().linear_regression(
                    data_partition=0.5
                )

        if realisation_rate_line and total_amount:
            extrapolated_rate_line = self._make_extrapolated_line(
                realisation_rate_line, mean_rate, total_amount
            )
            line = realisation_rate_line.append(
                other=extrapolated_rate_line,
                skip=1,
                name=self.indicator_name,
                max_value=total_amount,
                project=project,
            )
        elif realisation_rate_line and not total_amount:
            line = realisation_rate_line
        elif not realisation_rate_line and start_date and total_amount:
            n_days = total_amount / mean_rate
            n_days_int = math.floor(n_days)
            domain = DateDomainRange(begin=start_date, n_days=n_days_int - 1)
            # small correction so that the predicted amount == total amount on the last day
            mean_rate_corrected = (
                mean_rate + (n_days - n_days_int) * mean_rate / n_days_int
            )
            line = TimeseriesLine(
                data=mean_rate_corrected,
                domain=domain,
                name=self.indicator_name,
                max_value=total_amount,
                project=project,
            )
        else:
            line = None
        return line
    def test_append_timeseries(self):
        timeseries1 = TimeseriesLine(
            pd.Series(index=['2021-01-01', '2021-01-02', '2021-01-03'],
                      data=[1, 2, 3]))
        timeseries2 = TimeseriesLine(
            pd.Series(index=['2021-01-04', '2021-01-05', '2021-01-06'],
                      data=[3, 4, 5]))
        timeseries_result = TimeseriesLine(
            pd.Series(index=[
                '2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
                '2021-01-05', '2021-01-06'
            ],
                      data=[1, 2, 3, 3, 4, 5]))
        pd.testing.assert_series_equal(timeseries1.append(timeseries2),
                                       timeseries_result)

        timeseries1 = TimeseriesLine(
            pd.Series(index=['2021-01-01', '2021-01-02', '2021-01-03'],
                      data=[1, 2, 3]))
        timeseries2 = TimeseriesLine(
            pd.Series(index=['2021-01-03', '2021-01-04', '2021-01-05'],
                      data=[3, 4, 5]))
        timeseries_result = TimeseriesLine(
            pd.Series(index=[
                '2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
                '2021-01-05'
            ],
                      data=[1, 2, 3, 4, 5]))
        assert pytest.raises(ValueError, timeseries1.append(timeseries2,
                                                            skip=0))
        pd.testing.assert_series_equal(timeseries1.append(timeseries2, skip=1),
                                       timeseries_result)