def calculate_poc_ideal_rate_line(self): """This function calculates the percentage of completion (poc) line given what has been realised so far and what still needs to be done to make the target deadline. This line is expressed in rate per day. The line is based on the poc real rate line and is extended with the daily rate that is required to make the target deadline. In the calculation of the required daily rate also holiday periods with zero activity are taken into account. Returns: poc ideal rate line (object) """ poc_real_rate_line = self.calculate_poc_real_rate_line() target_rate_line = self.calculate_target_rate_line() distance_to_max_value = poc_real_rate_line.distance_to_max_value() daysleft = poc_real_rate_line.daysleft(end=target_rate_line.domain.end) # normal case: when there is still work to do and there is time left before the target deadline if (distance_to_max_value > 0) & (daysleft > 0): domain = DateDomain( begin=poc_real_rate_line.domain.end, end=target_rate_line.domain.end ) holidays_in_date_range = self.count_holidays_in_date_range( self.holiday_periods, domain.domain ) domain = DateDomain( begin=poc_real_rate_line.domain.end, end=target_rate_line.domain.end - timedelta(holidays_in_date_range), ) slope = distance_to_max_value / (daysleft - holidays_in_date_range) line = poc_real_rate_line.append( TimeseriesLine(data=slope, domain=domain), skip=1 ) # exception: when there is still work to do but the target deadline has already passed elif (distance_to_max_value > 0) & (daysleft <= 0): slope = ( distance_to_max_value / 7 ) # past deadline, production needs to be finish within a week domain = DateDomain( begin=poc_real_rate_line.domain.end, end=pd.Timestamp.now() + timedelta(7), ) line = poc_real_rate_line.append( TimeseriesLine(data=slope, domain=domain), skip=1 ) # no more work to do, so ideal line == realised line else: line = poc_real_rate_line holiday_periods = self.slice_holiday_periods( holiday_periods=self.holiday_periods, periods_to_remove=poc_real_rate_line.domain.domain, ) line = self.add_holiday_periods_to_line(line, holiday_periods) line.name = "poc_ideal_indicator" line.max_value = self.phase_data["total_units"] return line
def perform(self): """ Aggregate to clusters and retrieve the counts of connection types per project, then make the result into a record. Returns: Record ready to be written to the firestore, containing clustered data. """ df = self.apply_business_rules() project_aggregate = self.aggregate( df=df, by=["project", "afsluitcode", "afsluitdatum"], agg_function={"order_nummer": "count"}, ) record_list = RecordList() for project in list(project_aggregate.index.get_level_values(level=0).unique()): for afsluitcode in list( project_aggregate.loc[project].index.get_level_values(level=0).unique() ): line_project = TimeseriesLine( data=pd.Series( project_aggregate.loc[project].loc[afsluitcode]["order_nummer"] ), name=self.graph_name, max_value=None, project=project, ) record_list.append(self.to_record(line_project, afsluitcode)) return record_list
def _make_extrapolated_line(self, realisation_rate_line, mean_rate, total_amount): """Extrapolates the realised timeseriesline with the average speed until the end of the project is reached. Args: realisation_rate_line: realised timeseries line calculated with the RealisationHPendIndicator. mean_rate: the average speed for extrapolation. total_amount: the total number of houses to be realised in the project. Returns: Extrapolated timeseries line """ start_date = realisation_rate_line.domain.end distance_to_max_value = ( total_amount - realisation_rate_line.integrate().get_most_recent_point() ) n_days = distance_to_max_value / mean_rate if n_days >= 1: n_days_int = math.floor(n_days) # small correction so that the predicted amount == total amount on the last day mean_rate_corrected = ( mean_rate + (n_days - n_days_int) * mean_rate / n_days_int ) else: n_days_int = 1 mean_rate_corrected = mean_rate * n_days domain = DateDomainRange(begin=start_date, n_days=n_days_int) line = TimeseriesLine(data=mean_rate_corrected, domain=domain) return line
def calculate_poc_verwacht_rate_line(self): """This function calculates the percentage of completion (poc) line given what has been realised so far and what is expected that will be done given past performance. This line is expressed in rate per day. The line is based on the poc real rate line and is extended with a daily rate that is based on the average performance during the last months. In the calculation of the expected daily rate also holiday periods with zero activity are taken into account. Returns: poc real rate line (object) """ poc_real_rate_line = self.calculate_poc_real_rate_line() slope = poc_real_rate_line.integrate().extrapolate(data_partition=0.5).slope # when there not enough realised data pionts, we take the ideal speed as slope if slope == 0: slope = self.phase_data["performance_norm_unit"] distance_to_max_value = poc_real_rate_line.distance_to_max_value() daysleft = poc_real_rate_line.daysleft(slope=slope) # if there is work to do we extend the pocreal line, if not ideal line == realised line if distance_to_max_value > 0: domain = DateDomainRange( begin=poc_real_rate_line.domain.end, n_days=daysleft ) line = poc_real_rate_line.append( TimeseriesLine(data=slope, domain=domain), skip=1 ) else: line = poc_real_rate_line holiday_periods = self.slice_holiday_periods( self.holiday_periods, poc_real_rate_line.domain.domain ) line = self.add_holiday_periods_to_line(line, holiday_periods) line.name = "poc_verwacht_indicator" line.max_value = self.phase_data["total_units"] return line
def create_line(value): """ Creates a timseriesline from a single data point, on todays date. Args: value: value to be made into a timeseriesline Returns: a TimeseriesLine with index today and one value """ domain = DateDomain(pd.datetime.today(), pd.datetime.today()) return TimeseriesLine(domain=domain, data=value)
def calculate_poc_real_rate_line(self): """This function calculates the percentage of completion (poc) line given what has been realised so far. This line is is expressed in rate per day. The line is based on the historical data of this phase in the given project. Returns: poc real rate line (object) """ ds = self.df[self.phase_data["phase_column"]] line = TimeseriesLine( data=ds, name="poc_real_indicator", max_value=self.phase_data["total_units"] ) return line
def _make_project_line(self, project, df): """ calculates a TimeseriesLine for a given project Args: project (str): project name df (pd.DataFrame): dataframe containing planning of a project Returns: Timeseriesline with planning for the project """ data = df.droplevel(level=0) return TimeseriesLine(data=data, name=self.indicator_name, project=project)
def _make_project_line(self, project): """ Creates a line which contains the required daily speed (houses / day) in a given phase of the project given the targeted start date, end date and / or speed of the project. The target information is stored at the dictionary project_info. Args: project Returns: Timeseries line """ start_project = self.project_info[project][self.type_start_date] end_project = self.project_info[project][self.type_end_date] if end_project: end_project = ( pd.to_datetime(end_project) - timedelta(self.n_days_shift_end_date)).strftime("%Y-%m-%d") total_houses = self.project_info[project][self.type_total_houses] total_meters = self.project_info[project][self.type_total_meters] speed_project = self.project_info[project][self.type_speed] if speed_project and total_meters and total_houses: slope = speed_project / 7 * total_houses / total_meters elif not speed_project and start_project and end_project and total_houses: slope = (total_houses / (pd.to_datetime(end_project) - pd.to_datetime(start_project)).days) else: slope = None if start_project and slope and total_houses: n_days = total_houses / slope n_days_int = math.floor(n_days) domain = DateDomainRange(begin=start_project, n_days=n_days_int - 1) # small correction so that the predicted amount == total amount on the last day slope_corrected = slope + (n_days - n_days_int) * slope / n_days_int line = TimeseriesLine( data=slope_corrected, domain=domain, name=self.indicator_name, max_value=total_houses, project=project, ) else: line = None return line
def get_progress_of_realisatie( self, df_realisatie, project, this_week, total_units ): if project in df_realisatie.index: data = df_realisatie.loc[project] realisatie_series = ( TimeseriesLine(data=data) .resample(freq="W-MON", method="sum") .integrate() .make_series() ) if this_week in realisatie_series.index: progress = realisatie_series.loc[this_week] / total_units else: progress = realisatie_series.iloc[-1] / total_units else: progress = None return progress
def calculate_target_rate_line(self): """This functions calculates the target line expressed in rate per day. The line is based on the start date, number of days and performance norm as specified at phase data. Returns: target rate line (object) """ intercept = self.phase_data["performance_norm_unit"] domain = DateDomainRange( begin=self.phase_data["start_date"], n_days=self.phase_data["n_days"] ) line = TimeseriesLine( data=intercept, domain=domain, name="target_indicator", max_value=self.phase_data["total_units"], ) return line
def _add_holiday_period(self, line, holiday_period): """ Helper function to add a single rest period to a TimeseriesLine Args: line: rest_period: Returns: """ holiday_period_line = TimeseriesLine( domain=DateDomain(begin=holiday_period[0], end=holiday_period[-1]), data=0 ) before_line = line.slice(end=holiday_period.min()) after_line = line.slice(begin=holiday_period.min()).translate_x( len(holiday_period) ) return before_line.append(holiday_period_line, skip=1, skip_base=True).append( after_line )
def perform(self): """ Main perform to do all necessary calculations for indicator. Returns: List of Records with lines per project. """ df = self.aggregate(df=self.apply_business_rules()) line_list = [] record_list = RecordList() if not df.empty: for project, timeseries in df.groupby(level=0): if len(timeseries): if self.type_total_amount: max_value = self.project_info[project][self.type_total_amount] else: max_value = None line_project = TimeseriesLine( data=timeseries.droplevel(0), name=self.indicator_name, max_value=max_value, project=project, ) line_list.append(line_project) record_list.append(self.to_record(line_project)) line_client = concat( line_list, name=self.indicator_name, project="client_aggregate" ) line_list.append(line_client) record_list.append(self.to_record(line_client)) if self.return_lines: return line_list else: return record_list
def _make_project_line(self, project): """Creates a line for given project that extrapolates the daily speed of realisation line (HPend / day) that has been achieved so far. The extrapolation is based on the average of a set of daily speeds in the realisation line (on default the set of speeds at the last 50% of days on which HPend has been realized). If too little has been realized on the project for calculation of an average, the extrapolation is based on the average speed over all projects. Args: project Returns: Timeseries Line """ start_date = self.project_info[project][self.type_start_date] total_amount = self.project_info[project][self.type_total_amount] realisation_rate_line = RealisationHPendIndicator( project_info=self.project_info, df=self.df[self.df.project == project], client=self.client, return_lines=True, silence=True, ).perform() if realisation_rate_line: realisation_rate_line = realisation_rate_line[0] else: realisation_rate_line = None mean_rate = self.mean_realisation_rate_client if realisation_rate_line: if len(realisation_rate_line) >= 2: mean_rate, _ = realisation_rate_line.integrate().linear_regression( data_partition=0.5 ) if realisation_rate_line and total_amount: extrapolated_rate_line = self._make_extrapolated_line( realisation_rate_line, mean_rate, total_amount ) line = realisation_rate_line.append( other=extrapolated_rate_line, skip=1, name=self.indicator_name, max_value=total_amount, project=project, ) elif realisation_rate_line and not total_amount: line = realisation_rate_line elif not realisation_rate_line and start_date and total_amount: n_days = total_amount / mean_rate n_days_int = math.floor(n_days) domain = DateDomainRange(begin=start_date, n_days=n_days_int - 1) # small correction so that the predicted amount == total amount on the last day mean_rate_corrected = ( mean_rate + (n_days - n_days_int) * mean_rate / n_days_int ) line = TimeseriesLine( data=mean_rate_corrected, domain=domain, name=self.indicator_name, max_value=total_amount, project=project, ) else: line = None return line
def test_append_timeseries(self): timeseries1 = TimeseriesLine( pd.Series(index=['2021-01-01', '2021-01-02', '2021-01-03'], data=[1, 2, 3])) timeseries2 = TimeseriesLine( pd.Series(index=['2021-01-04', '2021-01-05', '2021-01-06'], data=[3, 4, 5])) timeseries_result = TimeseriesLine( pd.Series(index=[ '2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04', '2021-01-05', '2021-01-06' ], data=[1, 2, 3, 3, 4, 5])) pd.testing.assert_series_equal(timeseries1.append(timeseries2), timeseries_result) timeseries1 = TimeseriesLine( pd.Series(index=['2021-01-01', '2021-01-02', '2021-01-03'], data=[1, 2, 3])) timeseries2 = TimeseriesLine( pd.Series(index=['2021-01-03', '2021-01-04', '2021-01-05'], data=[3, 4, 5])) timeseries_result = TimeseriesLine( pd.Series(index=[ '2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04', '2021-01-05' ], data=[1, 2, 3, 4, 5])) assert pytest.raises(ValueError, timeseries1.append(timeseries2, skip=0)) pd.testing.assert_series_equal(timeseries1.append(timeseries2, skip=1), timeseries_result)