def perform(self): """ Main loop that applies business rules, and creates DictRecord for all projects in dataframe. Returns: DictRecord with ratios per project and the overall ratio. """ records = RecordList() df = self.apply_business_rules() if not df.empty: ratio = (df.leverbetrouwbaar.sum() / df.hpend.sum() if df.hpend.sum() != 0 else 0) aggregate_line = self.create_line(ratio) records.append(self.to_record("client_aggregate", aggregate_line)) df = super().aggregate( df, by="project", agg_function={ "hpend": "sum", "leverbetrouwbaar": "sum" }, ) df["ratio"] = df["leverbetrouwbaar"] / df["hpend"] df["ratio"] = df["ratio"].fillna(0) for project in set(df.index): project_line = self.create_line(df.loc[project, "ratio"]) records.append(self.to_record(project, project_line)) return records
def perform(self): """ Aggregate to clusters and retrieve the counts, then make the result into records per project, per cluster. Returns: RecordList with a record for every project, per cluster, including provider level aggregates. """ df = self.apply_business_rules() agg_df = self.aggregate(df=df, by=["project", "cluster_redenna", "hasdatum"], agg_function="size") records = RecordList() for project, series in agg_df.groupby(level=0): for cluster, data in series.groupby(level=1): if len(data): records.append( self.to_record( line=TimeseriesDistanceLine(data.droplevel([0, 1])), project=project, cluster=cluster, )) records = self.make_provider_lines(df, records) return records
def perform(self): """ Aggregate to clusters and retrieve the counts of connection types per project, then make the result into a record. Returns: Record ready to be written to the firestore, containing clustered data. """ df = self.apply_business_rules() project_aggregate = self.aggregate( df=df, by=["project", "afsluitcode", "afsluitdatum"], agg_function={"order_nummer": "count"}, ) record_list = RecordList() for project in list(project_aggregate.index.get_level_values(level=0).unique()): for afsluitcode in list( project_aggregate.loc[project].index.get_level_values(level=0).unique() ): line_project = TimeseriesLine( data=pd.Series( project_aggregate.loc[project].loc[afsluitcode]["order_nummer"] ), name=self.graph_name, max_value=None, project=project, ) record_list.append(self.to_record(line_project, afsluitcode)) return record_list
def perform(self): """ Main perform to do all necessary calculations for indicator. Returns: List of Records with lines per project and client_line for overall planning. """ df = self.aggregate( df=self.apply_business_rules(), by=["project", "hasdatum"], agg_function="size", ) line_list = [] record_list = RecordList() if not df.empty: for project, df in df.groupby(level=0): if len(df): line_project = self._make_project_line(project=project, df=df) record_list.append(self.to_record(line_project)) line_list.append(line_project) line_client = concat(line_list, name=self.indicator_name, project="client_aggregate") record_list.append(self.to_record(line_client)) return record_list
def perform(self): """ Main perform to do all necessary calculations for indicator. Returns: List of Records with line per project. """ df = self.apply_business_rules() line_list = [] record_list = RecordList() if not df.empty: for project, df in df.groupby(level=0): if len(df): line_project = self._make_project_line(project=project, df=df) line_list.append(line_project) record_list.append(self.to_record(line_project)) line_client = concat(line_list, name=self.indicator_name, project="client_aggregate") record_list.append(self.to_record(line_client)) return record_list
class RecordListWrapper(MutableMapping): """A Dictionary that holds all records for an analysis""" def __setitem__(self, k, v) -> None: pass def __delitem__(self, v) -> None: pass def __getitem__(self, k): pass def __len__(self) -> int: return len(self.record_list) def __iter__(self): return self.record_list.__iter__() def __init__(self, client, record_collection=None): self.record_list = RecordList() self.client_name = client if record_collection: for key, record in record_collection.items(): if isinstance(record, Record): self.record_collection[key] = record else: raise ValueError(f"record collection must contain records," f"{key} contains an object of type: {type(record)}") def add(self, key, record, record_type, collection, **kwargs): """ Function to add data to the RecordListWrapper. Will create a record of given type and add it to the collection. Args: key: Name of Record record: Data that will be made into a record record_type: type of Record that the data will be turned into. collection: Collection the record should be part of. **kwargs: """ record = record_type(record, collection, self.client_name, key, **kwargs) self.record_list.append(record) def to_firestore(self): """ Calls the to_firestore function of all objects in its collection, writing the entire collection to the firestore. """ self.record_list.to_firestore()
def perform(self): series = self.aggregate(df=self.apply_business_rules()) records = RecordList() for project, time_values in series.iterrows(): for time_constraint, value in time_values.iteritems(): project_time_line = self.create_line(value) records.append( self.to_record(project, project_time_line, time_constraint)) for time_constraint, values in series.iteritems(): aggregate_line = self.create_line(values.sum()) records.append( self.to_record("client_aggregate", aggregate_line, time_constraint)) return records
def perform(self): """ Main loop that applies business rules, aggregates resulting frame, and creates records for all projects in dataframe. Returns: RecordList with actual numbers for every project. Provider total is added in to_record. """ series = self.aggregate(df=self.apply_business_rules()) records = RecordList() for project, value in series.iterrows(): project_line = self.create_line(value) records.append(self.to_record(project, project_line)) aggregate_line = self.create_line(series.sum()) records.append(self.to_record("client_aggregate", aggregate_line)) return records
def perform(self): """Adds for each project a record to the record_list that contains the line made at _make_project_line. Returns: List of records which are ready for sending to the firestore. """ line_list = [] record_list = RecordList() for project in self.project_info: line_project = self._make_project_line(project) if line_project: line_list.append(line_project) record_list.append(self.to_record(line_project)) if line_list: line_client = concat(line_list, name=self.indicator_name, project="client_aggregate") line_list.append(line_client) record_list.append(self.to_record(line_client)) return record_list
def perform(self): """ Main perform function for the FinanceIndicator Returns: Recordlist with relevant records for the finance analyse """ record_list = RecordList() for project in list(self.budget.project_naam.unique()): actuals, budget = self.apply_business_rules(project) if not actuals.empty and not budget.empty: actuals_aggregated = self.aggregate(actuals) record = dict(budget=budget.to_dict(orient='records'), actuals=actuals.to_dict(orient='records'), actuals_aggregated=actuals_aggregated.to_dict( orient='records')) record_list.append( self.to_record(record=record, collection='Finance', graph_name='finance_baan', project=project)) return record_list
def perform(self): """ Main perform to do all necessary calculations for indicator. Returns: List of Records with lines per project. """ df = self.aggregate(df=self.apply_business_rules()) line_list = [] record_list = RecordList() if not df.empty: for project, timeseries in df.groupby(level=0): if len(timeseries): if self.type_total_amount: max_value = self.project_info[project][self.type_total_amount] else: max_value = None line_project = TimeseriesLine( data=timeseries.droplevel(0), name=self.indicator_name, max_value=max_value, project=project, ) line_list.append(line_project) record_list.append(self.to_record(line_project)) line_client = concat( line_list, name=self.indicator_name, project="client_aggregate" ) line_list.append(line_client) record_list.append(self.to_record(line_client)) if self.return_lines: return line_list else: return record_list
class FttXIndicatorAnalyse(FttXBase): def __init__(self, **kwargs): super().__init__(**kwargs) self.records = RecordList() def analyse(self): """Performs an indicator and collects the records into records.""" df = self.transformed_data.df project_info = self.transformed_data.project_info self.records.append(self._set_filters(client=self.client)) self.records.append(self._calculate_list_of_years(client=self.client)) self.records.append(self._progress_per_phase_over_time_for_finance()) self.records.append(self._progress_per_phase_for_finance()) self.records.append( RedenNaIndicator(df=df, client=self.client).perform()) self.records.append( ActualStatusBarChartIndicator(df=df, client=self.client).perform()) self.records.append( WerkvoorraadIndicator(df=df, client=self.client).perform()) self.records.append( RealisationHPcivielIndicator(df=df, project_info=project_info, client=self.client).perform()) self.records.append( PerformanceGraphIndicator( df=df, project_info=self.transformed_data.project_info, client=self.client, ).perform()) self.records.append( LeverbetrouwbaarheidIndicator( df=df, client=self.client, ).perform()) self.records.append( InternalTargetHPendIndicator(project_info=project_info, client=self.client).perform()) self.records.append( RealisationHPendIndicator(df=df, project_info=project_info, client=self.client).perform()) def _set_filters(self, client): """ Sets the set of projects that should be shown in the dashboard as record, so that it can be retrieved from the firestore. """ return ListRecord( record=create_project_filter(self.transformed_data.df), graph_name="project_names", collection="Data", client=client, ) def _progress_per_phase_over_time_for_finance(self): """ This function calculates the progress per phase over time base on the specified columns per phase: 'opleverdatum': 'has', 'schouwdatum': 'schouwen', 'laswerkapgereed_datum': 'montage ap', 'laswerkdpgereed_datum': 'montage dp', 'status_civiel_datum': 'civiel' Adds a record consisting of dict per project holding a timeindex with progress per phase """ logger.info("Calculating project progress per phase over time") document_list = [] for project, df in self.transformed_data.df.groupby("project"): if df.empty: continue columns = [ "opleverdatum", "schouwdatum", "laswerkapgereed_datum", "laswerkdpgereed_datum", "status_civiel_datum", "laswerkapgereed", "laswerkdpgereed", ] date_df = df.loc[:, columns] mask = br.laswerk_dp_gereed(df) & br.laswerk_ap_gereed(df) date_df["montage"] = np.datetime64("NaT") date_df.loc[mask, "montage"] = date_df[[ "laswerkapgereed_datum", "laswerkdpgereed_datum" ]][mask].max(axis=1) date_df = date_df.drop( columns=["laswerkapgereed", "laswerkdpgereed"]) progress_over_time: pd.DataFrame = date_df.apply( pd.value_counts).resample("D").sum().cumsum() / len(df) progress_over_time.index = progress_over_time.index.strftime( "%Y-%m-%d") progress_over_time.rename( columns={ "opleverdatum": "has", "schouwdatum": "schouwen", "laswerkapgereed_datum": "montage ap", "laswerkdpgereed_datum": "montage dp", "status_civiel_datum": "civiel", }, inplace=True, ) record = progress_over_time.to_dict() document_list.append( dict( client=self.client, project=project, data_set="progress_over_time", record=record, )) return DocumentListRecord( record=document_list, client=self.client, collection="Data", graph_name="Progress_over_time", document_key=["client", "project", "data_set"], ) def _progress_per_phase_for_finance(self): """ Calculates the progress per phase for the phases civiel, montage, schouwen, hc, hp and hp end, as well as the totals per project. These results are put in a record and added to the records attribute of the class. """ logger.info("Calculating project progress per phase") progress_df = pd.concat( [ self.transformed_data.df.project, ~self.transformed_data.df.sleutel.isna(), br.bis_opgeleverd(self.transformed_data.df), br.laswerk_dp_gereed(self.transformed_data.df) & br.laswerk_ap_gereed(self.transformed_data.df), br.geschouwd(self.transformed_data.df), br.hc_opgeleverd(self.transformed_data.df), br.hp_opgeleverd(self.transformed_data.df), br.hpend(self.transformed_data.df), ], axis=1, ) progress_df.columns = [ "project", "totaal", "civiel", "montage", "schouwen", "hc", "hp", "hpend", ] documents = [ dict(project=project, client=self.client, data_set="progress", record=values) for project, values in progress_df.groupby( "project").sum().to_dict(orient="index").items() ] return DocumentListRecord( record=documents, client=self.client, collection="Data", graph_name="Progress", document_key=["client", "project", "data_set"], ) def _calculate_list_of_years(self, client): """ Calculates a list of years per client based on the dates that are found in the date columns. """ logger.info("Calculating list of years") date_columns = self.transformed_data.datums dc_data = self.transformed_data.df.loc[:, date_columns] list_of_years = [] for col in dc_data.columns: list_of_years += list( dc_data[col].dropna().dt.year.unique().astype(str)) list_of_years = sorted(list(set(list_of_years))) return Record( record=list_of_years, collection="Data", graph_name="List_of_years", client=client, )
class PhaseCapacity: def __init__( self, df: pd.DataFrame, phase_data: dict, client: str, project: str, holiday_periods: list, poc_ideal_rate_line_masterphase=None, masterphase_data=None, ): """This class enables to calculate and make records of all lines of a phase required for the capacity algorithm for a given project. Args: df (pd.DataFrame): contains the complete set of historical data within a project that is relevant for the capacity algorithm. phase_data (dict): contains attributes of the phase such as its start date. client (str): specifies to which client the phase belongs. project (str): specifies to which project the phase belongs. holiday_periods (list): specifies the holiday periods that apply to the project. poc_ideal_rate_line_masterphase (object): is an object of the poc ideal rate line of the phase that determines the work_stock for this phase. Defaults to None. masterphase_data ([type], optional): contains attributes of the phase that controls the work_stock for this phase. Defaults to None. """ self.df = df self.phase_data = phase_data self.client = client self.project = project self.holiday_periods = holiday_periods self.poc_ideal_rate_line_masterphase = poc_ideal_rate_line_masterphase self.masterphase_data = masterphase_data self.record_list = RecordList() def algorithm(self): """ This functions calculates the lines required for this phase and joins them in a record list. The logic that is applied per line is specified in the line specific function. Returns: PhaseCapacity (object): for method chaining """ lines = [] lines.append(self.calculate_target_rate_line()) lines.append(self.calculate_poc_real_rate_line()) lines.append(self.calculate_poc_ideal_rate_line()) lines.append(self.calculate_poc_verwacht_rate_line()) if self.poc_ideal_rate_line_masterphase: lines.append(self.calculate_work_stock_rate_line()) lines.append(self.calculate_work_stock_amount_line()) [self.line_to_record(line) for line in lines] return self def calculate_target_rate_line(self): """This functions calculates the target line expressed in rate per day. The line is based on the start date, number of days and performance norm as specified at phase data. Returns: target rate line (object) """ intercept = self.phase_data["performance_norm_unit"] domain = DateDomainRange( begin=self.phase_data["start_date"], n_days=self.phase_data["n_days"] ) line = TimeseriesLine( data=intercept, domain=domain, name="target_indicator", max_value=self.phase_data["total_units"], ) return line def calculate_poc_real_rate_line(self): """This function calculates the percentage of completion (poc) line given what has been realised so far. This line is is expressed in rate per day. The line is based on the historical data of this phase in the given project. Returns: poc real rate line (object) """ ds = self.df[self.phase_data["phase_column"]] line = TimeseriesLine( data=ds, name="poc_real_indicator", max_value=self.phase_data["total_units"] ) return line def calculate_poc_ideal_rate_line(self): """This function calculates the percentage of completion (poc) line given what has been realised so far and what still needs to be done to make the target deadline. This line is expressed in rate per day. The line is based on the poc real rate line and is extended with the daily rate that is required to make the target deadline. In the calculation of the required daily rate also holiday periods with zero activity are taken into account. Returns: poc ideal rate line (object) """ poc_real_rate_line = self.calculate_poc_real_rate_line() target_rate_line = self.calculate_target_rate_line() distance_to_max_value = poc_real_rate_line.distance_to_max_value() daysleft = poc_real_rate_line.daysleft(end=target_rate_line.domain.end) # normal case: when there is still work to do and there is time left before the target deadline if (distance_to_max_value > 0) & (daysleft > 0): domain = DateDomain( begin=poc_real_rate_line.domain.end, end=target_rate_line.domain.end ) holidays_in_date_range = self.count_holidays_in_date_range( self.holiday_periods, domain.domain ) domain = DateDomain( begin=poc_real_rate_line.domain.end, end=target_rate_line.domain.end - timedelta(holidays_in_date_range), ) slope = distance_to_max_value / (daysleft - holidays_in_date_range) line = poc_real_rate_line.append( TimeseriesLine(data=slope, domain=domain), skip=1 ) # exception: when there is still work to do but the target deadline has already passed elif (distance_to_max_value > 0) & (daysleft <= 0): slope = ( distance_to_max_value / 7 ) # past deadline, production needs to be finish within a week domain = DateDomain( begin=poc_real_rate_line.domain.end, end=pd.Timestamp.now() + timedelta(7), ) line = poc_real_rate_line.append( TimeseriesLine(data=slope, domain=domain), skip=1 ) # no more work to do, so ideal line == realised line else: line = poc_real_rate_line holiday_periods = self.slice_holiday_periods( holiday_periods=self.holiday_periods, periods_to_remove=poc_real_rate_line.domain.domain, ) line = self.add_holiday_periods_to_line(line, holiday_periods) line.name = "poc_ideal_indicator" line.max_value = self.phase_data["total_units"] return line def calculate_poc_verwacht_rate_line(self): """This function calculates the percentage of completion (poc) line given what has been realised so far and what is expected that will be done given past performance. This line is expressed in rate per day. The line is based on the poc real rate line and is extended with a daily rate that is based on the average performance during the last months. In the calculation of the expected daily rate also holiday periods with zero activity are taken into account. Returns: poc real rate line (object) """ poc_real_rate_line = self.calculate_poc_real_rate_line() slope = poc_real_rate_line.integrate().extrapolate(data_partition=0.5).slope # when there not enough realised data pionts, we take the ideal speed as slope if slope == 0: slope = self.phase_data["performance_norm_unit"] distance_to_max_value = poc_real_rate_line.distance_to_max_value() daysleft = poc_real_rate_line.daysleft(slope=slope) # if there is work to do we extend the pocreal line, if not ideal line == realised line if distance_to_max_value > 0: domain = DateDomainRange( begin=poc_real_rate_line.domain.end, n_days=daysleft ) line = poc_real_rate_line.append( TimeseriesLine(data=slope, domain=domain), skip=1 ) else: line = poc_real_rate_line holiday_periods = self.slice_holiday_periods( self.holiday_periods, poc_real_rate_line.domain.domain ) line = self.add_holiday_periods_to_line(line, holiday_periods) line.name = "poc_verwacht_indicator" line.max_value = self.phase_data["total_units"] return line def calculate_work_stock_rate_line(self): """This function calculates the work stock line given the poc ideal rate line of the master phase that controls the work_stock for this phase. The work stock line is expressed in rate per day. Raises: ValueError: this function cannot be executed without the poc ideal rate line of the masterphase. Returns: work_stock rate line (object) """ if not self.poc_ideal_rate_line_masterphase: raise ValueError ratio = self.phase_data["total_units"] / self.masterphase_data["total_units"] line = self.poc_ideal_rate_line_masterphase * ratio line.name = "work_stock_indicator" line.max_value = self.phase_data["total_units"] return line def calculate_work_stock_amount_line(self): """This function calculates the work stock amount line which specifies the total amount of work stock at a given day. This line is expressed in amount per day, not in rates per day. The work stock amount line is calculated by subtracting the integral of the poc ideal rate line from the integral of the work_stock rate line. Returns: work stock amount line (object) """ line = ( self.calculate_work_stock_rate_line().integrate() - self.calculate_poc_ideal_rate_line().integrate() ) line.name = "work_stock_amount_indicator" line.max_value = self.phase_data["total_units"] return line def line_to_record(self, line: object): """This functions takes a line object and adds it as a record to the record list. Args: line (object) """ if line.name == "work_stock_amount_indicator": self.record_list.append( LineRecord( record=line, collection="Lines", graph_name=f"{line.name}", phase=self.phase_data["name"], client=self.client, project=self.project, resample_method="mean", ) ) self.record_list.append( LineRecord( record=line, collection="Lines", graph_name=f"{line.name}_units", phase=self.phase_data["name"], client=self.client, project=self.project, resample_method="mean", ) ) else: self.record_list.append( LineRecord( record=line, collection="Lines", graph_name=f"{line.name}", phase=self.phase_data["name"], client=self.client, project=self.project, ) ) self.record_list.append( LineRecord( record=line, collection="Lines", graph_name=f"{line.name}_units", phase=self.phase_data["name"], client=self.client, project=self.project, to_be_normalized=False, percentage=False, ) ) def get_record(self): """Method to show records""" return self.record_list @staticmethod def slice_holiday_periods(holiday_periods, periods_to_remove): """ Slice holiday periods to only contain relevant dates. Used to ensure some holidays are not counted doubly. Args: holiday_periods: periods_to_remove: Returns: A sliced set of holiday periods """ new_holiday_periods = [] for base_holiday_period in holiday_periods: min_date = base_holiday_period.min() max_date = base_holiday_period.max() remove_start = min_date in periods_to_remove remove_end = max_date in periods_to_remove if not remove_start and not remove_end: new_holiday_periods.append(base_holiday_period) elif remove_start and not remove_end: new_holiday_periods.append( pd.date_range( periods_to_remove.max() + timedelta(days=1), base_holiday_period[-1], ) ) elif not remove_start and remove_end: new_holiday_periods.append( pd.date_range( base_holiday_period[0], periods_to_remove.min() + timedelta(days=-1), ) ) return new_holiday_periods @staticmethod def count_holidays_in_date_range(holidays, date_range): """ Counts the amount of holidays in a given date range Args: holidays: Set of date ranges which are considered holidays date_range: target range in which the holidays are counted Returns: The amount of holidays in the date range. """ count = 0 for holiday in holidays: count += len(set(holiday).intersection(set(date_range))) return count def add_holiday_periods_to_line(self, line, sorted_holiday_periods): """ Function that will enhance a line to include input rest periods. The productivity during the rest period will be 0, and the line will be extended to keep the same total. Args: line: sorted_rest_dates: Returns: """ holiday_periods = copy.deepcopy( sorted_holiday_periods ) # Retrieve full set of defined rest dates # Main loop. # You have to loop over the rest periods multiple times, # because you are extending the timeperiod in every loop while True: # Find all relevant rest periods, given current dates of the line ( next_holiday_period, other_periods, ) = self._find_next_holiday_periods_in_date_range( line.make_series().index, holiday_periods ) if not len(next_holiday_period): break # Stop looping if there's no rest periods left to add # Remove rest periods that have been added from the set that can still be added holiday_periods = other_periods # Add next relevant rest periods to the line, continue with the new line line = self._add_holiday_period(line, next_holiday_period) return line def _add_holiday_period(self, line, holiday_period): """ Helper function to add a single rest period to a TimeseriesLine Args: line: rest_period: Returns: """ holiday_period_line = TimeseriesLine( domain=DateDomain(begin=holiday_period[0], end=holiday_period[-1]), data=0 ) before_line = line.slice(end=holiday_period.min()) after_line = line.slice(begin=holiday_period.min()).translate_x( len(holiday_period) ) return before_line.append(holiday_period_line, skip=1, skip_base=True).append( after_line ) # Rest dates have to be sorted to yield correct results!! def _find_next_holiday_periods_in_date_range(self, date_range, holidays_period): """ Helper function to find the next rest period in the given set of rest dates. Args: date_range: rest_dates: Returns: """ overlapping_dates = [] while len(holidays_period) > 0: dates = holidays_period.pop(0) overlapping_dates = self._find_overlapping_dates(date_range, dates) if overlapping_dates: overlapping_dates = pd.date_range( start=overlapping_dates[0], end=overlapping_dates[-1], freq="D" ) break return overlapping_dates, holidays_period def _find_overlapping_dates(self, base_period, holidays_period): if holidays_period.min() in base_period: overlapping_dates = holidays_period.to_list() else: overlapping_dates = [ date for date in holidays_period if date in base_period ] return overlapping_dates def _remove_holiday_periods(self, holidays_period, to_remove): new_list = [x for x in holidays_period if not to_remove] if len(new_list) == len(holidays_period): raise ValueError( "Did not remove value from list, this would result in infinite loop" ) return new_list