예제 #1
0
def assign_groessenklassen():
    '''assign the groessenklassen to the gemeinde table based on their
    number of inhabitants'''
    tbx = DummyTbx()
    workspace = 'FGDB_Basisdaten_deutschland.gdb'
    gr_df = tbx.table_to_dataframe('Gemeindegroessenklassen',
                                   workspace=workspace,
                                   is_base_table=True)
    # set nan values to max integer (for highest groessenklasse)
    gr_df.loc[np.isnan(gr_df['bis']), 'bis'] = sys.maxint
    gem_columns = ['RS', 'Einwohner', 'groessenklasse',
                   'vwg_rs', 'vwg_groessenklasse']
    gem_table = tbx.table_to_dataframe('bkg_gemeinden', columns=gem_columns,
                                       workspace=workspace,
                                       is_base_table=True)
    summed = gem_table.groupby('vwg_rs').sum()['Einwohner'].reset_index()
    summed.rename(columns={'Einwohner': 'vwg_Einwohner'}, inplace=True)
    gem_table = gem_table.merge(summed, on='vwg_rs')

    for index, gem in gem_table.iterrows():
        for gr_col, ew_col in [('groessenklasse', 'Einwohner'),
                               ('vwg_groessenklasse', 'vwg_Einwohner')]:
            ew = gem[ew_col]
            higher = ew >= gr_df['von']
            lower = ew < gr_df['bis']
            # take the id where both borders match
            match = gr_df['groessenklasse'][np.logical_and(higher, lower)].values
            assert len(match) == 1
            gr_klasse = match[0]
            gem_table.loc[index, gr_col] = gr_klasse
    tbx.dataframe_to_table('bkg_gemeinden', gem_table, ['RS'],
                           workspace=workspace, is_base_table=True, 
                           upsert=False)
예제 #2
0
    def update_layersymbology(self, lyr, num_classes=13, column='weight'):
        """
        update the class break values of layersymbology

        Parameters
        ----------
        num_classes : int
            number of classes of the new layersymbology
        min_val : int or float
            minimum class value
        max_val : int or float
            maximum class value
        """
        from rpctools.utils.params import DummyTbx
        #get layer
        #mxd = arcpy.mapping.MapDocument('CURRENT')
        #lyr = arcpy.mapping.ListLayers(mxd, layername)
        #arcpy.AddMessage(lyr)
        #lyr = lyr[0]
        data_source = lyr.dataSource
        # get new classes
        tbx = DummyTbx()
        data = tbx._query_table(data_source, columns=[column])
        min_val = int(min(data)[0])
        max_val = int(max(data)[0]) + 1
        new_classes = np.linspace(min_val, max_val, num=num_classes + 1)
        new_classes = np.round(new_classes).astype(int)
        labels = ['bis zu {}'.format(c) for c in new_classes[1:]]
        # update layer
        lyr.symbology.classBreakValues = new_classes
        lyr.symbology.classBreakLabels = labels
        lyr.symbology.reclassify()
        arcpy.RefreshTOC()
        arcpy.RefreshActiveView()
def next_working_day(min_days_infront=2):
    """
    get the next working day in germany (no holidays, no saturdays, no sundays
    in all federal states)
    reuqires the basetable Feriendichte to hold days infront of today
    (atm data incl. 2017 - 2020)

    Parameters
    ----------
    min_days_infront : int (default: 2)
       returned day will be at least n days infront

    Returns
    -------
    day : datetime.date
       the next day without holidays,
       if day is out of range of basetable: today + min_days_infront
    """
    tbx = DummyTbx()
    today = np.datetime64(date.today())
    day = today + np.timedelta64(min_days_infront,'D')
    # get working days (excl. holidays)
    where = ("Wochentag <> 'Samstag' and "
             "Wochentag <> 'Sonntag' and "
             "Anteil_Ferien_Bevoelkerung = 0")
    df_density = tbx.table_to_dataframe(
        'Feriendichte', workspace='FGDB_Basisdaten_deutschland.gdb',
        where=where, is_base_table=True)
    df_density.sort('Datum', inplace=True)
    # can't compare directly because datetime64 has no length
    infront = np.where(df_density['Datum'] >= day)[0]
    if len(infront) > 0:
        # get the first day matching all conditions
        day = df_density.iloc[infront[0]]['Datum']
    return pd.Timestamp(day).date()
예제 #4
0
class Diagram(object):
    _workspace = None

    def __init__(self, **kwargs):
        """
        title : str
        """
        self.title = ''
        self.kwargs = kwargs
        self.tbx = DummyTbx()

    def create(self):
        '''
        create a plot

        kwargs: other optional parameters the subclassing diagram needs
        '''
        self.tbx._getParameterInfo()
        projectname = self.kwargs[
            'projectname'] if 'projectname' in self.kwargs else None
        self.tbx.set_active_project(projectname=projectname)
        if self._workspace:
            self.tbx.folders._workspace = self._workspace

    def show(self):
        pass
예제 #5
0
 def __init__(self, **kwargs):
     """
     title : str
     """
     self.title = ''
     self.kwargs = kwargs
     self.tbx = DummyTbx()
예제 #6
0
class Diagram(object):
    _workspace = None

    def __init__(self, **kwargs):
        """
        title : str
        """
        self.title = ''
        self.kwargs = kwargs
        self.tbx = DummyTbx()

    def create(self):
        '''
        create a plot

        kwargs: other optional parameters the subclassing diagram needs
        '''
        self.tbx._getParameterInfo()
        projectname = self.kwargs['projectname'] if 'projectname' in self.kwargs else None
        self.tbx.set_active_project(projectname=projectname)
        if self._workspace:
            self.tbx.folders._workspace = self._workspace

    def show(self):
        pass
예제 #7
0
    def update_layersymbology(self, lyr, num_classes=13, column='weight'):
        """
        update the class break values of layersymbology

        Parameters
        ----------
        num_classes : int
            number of classes of the new layersymbology
        min_val : int or float
            minimum class value
        max_val : int or float
            maximum class value
        """
        from rpctools.utils.params import DummyTbx
        #get layer
        #mxd = arcpy.mapping.MapDocument('CURRENT')
        #lyr = arcpy.mapping.ListLayers(mxd, layername)
        #arcpy.AddMessage(lyr)
        #lyr = lyr[0]
        data_source = lyr.dataSource
        # get new classes
        tbx = DummyTbx()
        data = tbx._query_table(data_source, columns=[column])
        min_val = int(min(data)[0])
        max_val = int(max(data)[0]) + 1
        new_classes = np.linspace(min_val, max_val, num=num_classes + 1)
        new_classes = np.round(new_classes).astype(int)
        labels = ['bis zu {}'.format(c) for c in new_classes[1:]]
        # update layer
        lyr.symbology.classBreakValues = new_classes
        lyr.symbology.classBreakLabels = labels
        lyr.symbology.reclassify()
        arcpy.RefreshTOC()
        arcpy.RefreshActiveView()
예제 #8
0
def get_project_centroid(projectname):
    """get the centroid of the defined areas of the given project
    (projection is defined by project)"""
    tbx = DummyTbx()
    tbx.set_active_project(projectname)
    flaechen_df = tbx.table_to_dataframe(
        'Teilflaechen_Plangebiet',
        columns=['INSIDE_X', 'INSIDE_Y'],
        workspace='FGDB_Definition_Projekt.gdb')
    x = flaechen_df['INSIDE_X'].mean()
    y = flaechen_df['INSIDE_Y'].mean()
    return x, y
예제 #9
0
def get_project_centroid(projectname):
    """get the centroid of the defined areas of the given project
    (projection is defined by project)"""
    tbx = DummyTbx()
    tbx.set_active_project(projectname)
    flaechen_df = tbx.table_to_dataframe(
        'Teilflaechen_Plangebiet',
        columns=['INSIDE_X', 'INSIDE_Y'],
        workspace='FGDB_Definition_Projekt.gdb')
    x = flaechen_df['INSIDE_X'].mean()
    y = flaechen_df['INSIDE_Y'].mean()
    return x, y
예제 #10
0
    def __init__(self,
                 df_distances,
                 df_markets,
                 df_zensus,
                 debug=False,
                 projectname=''):

        self.distances = df_distances
        self.markets = df_markets
        self.zensus = df_zensus
        self.tbx = DummyTbx(projectname=projectname)
        self.debug = debug
예제 #11
0
 def __init__(self, **kwargs):
     """
     title : str
     """
     self.title = ''
     self.kwargs = kwargs
     self.tbx = DummyTbx()
예제 #12
0
    def __init__(self, df_distances, df_markets, df_zensus, debug=False,
                 projectname=''):

        self.distances = df_distances
        self.markets = df_markets
        self.zensus = df_zensus
        self.tbx = DummyTbx(projectname=projectname)
        self.debug = debug
예제 #13
0
def get_extent(tablename, workspace, where=''):
    """
    get the extent of a table with optional where clause for the shapes
    """
    tbx = DummyTbx()
    tbx.set_active_project()
    xmin, xmax, ymin, ymax = [np.inf, -np.inf, np.inf, -np.inf]
    cursor = tbx.query_table(tablename, columns=['SHAPE@'],
                             workspace=workspace,
                             where=where)
    for row in cursor:
        shape = row[0]
        xmin = min(xmin, shape.extent.XMin)
        xmax = max(xmax, shape.extent.XMax)
        ymin = min(ymin, shape.extent.YMin)
        ymax = max(ymax, shape.extent.YMax)
    del(cursor)
    return xmin, ymin, xmax, ymax
예제 #14
0
def get_extent(tablename, workspace, where=''):
    """
    get the extent of a table with optional where clause for the shapes
    """
    tbx = DummyTbx()
    tbx.set_active_project()
    xmin, xmax, ymin, ymax = [np.inf, -np.inf, np.inf, -np.inf]
    cursor = tbx.query_table(tablename,
                             columns=['SHAPE@'],
                             workspace=workspace,
                             where=where)
    for row in cursor:
        shape = row[0]
        xmin = min(xmin, shape.extent.XMin)
        xmax = max(xmax, shape.extent.XMax)
        ymin = min(ymin, shape.extent.YMin)
        ymax = max(ymax, shape.extent.YMax)
    del (cursor)
    return xmin, ymin, xmax, ymax
def next_working_day(min_days_infront=2):
    """
    get the next working day in germany (no holidays, no saturdays, no sundays
    in all federal states)
    reuqires the basetable Feriendichte to hold days infront of today
    (atm data incl. 2017 - 2020)

    Parameters
    ----------
    min_days_infront : int (default: 2)
       returned day will be at least n days infront

    Returns
    -------
    day : datetime.date
       the next day without holidays,
       if day is out of range of basetable: today + min_days_infront
    """
    tbx = DummyTbx()
    today = np.datetime64(date.today())
    day = today + np.timedelta64(min_days_infront, 'D')
    # get working days (excl. holidays)
    where = ("Wochentag <> 'Samstag' and "
             "Wochentag <> 'Sonntag' and "
             "Anteil_Ferien_Bevoelkerung = 0")
    df_density = tbx.table_to_dataframe(
        'Feriendichte',
        workspace='FGDB_Basisdaten_deutschland.gdb',
        where=where,
        is_base_table=True)
    df_density.sort('Datum', inplace=True)
    # can't compare directly because datetime64 has no length
    infront = np.where(df_density['Datum'] >= day)[0]
    if len(infront) > 0:
        # get the first day matching all conditions
        day = df_density.iloc[infront[0]]['Datum']
    return pd.Timestamp(day).date()
def kostenaufteilung_startwerte(project):
    """
    Check if table Kostenaufteilung has data.
    If not: copy data from Kostenaufteilung_Startwerte

    Parameters
    ----------
    project : String
        name of the active project

    """
    table = 'Kostenaufteilung'
    tbx = DummyTbx()
    tbx.set_active_project(project)
    df_cost_allocation = tbx.table_to_dataframe(
        table, workspace='FGDB_Kosten.gdb')
    if len(df_cost_allocation) != 0:
        return
    df_cost_allocation_initial = tbx.table_to_dataframe(
        'Kostenaufteilung_Startwerte', columns=[],
        workspace='FGDB_Kosten_Tool.gdb', where=None, is_base_table=True)
    tbx.dataframe_to_table(table, df_cost_allocation_initial, pkeys=['OBJECTID'],
                           workspace='FGDB_Kosten.gdb', upsert=True)
예제 #17
0
def assign_groessenklassen():
    '''assign the groessenklassen to the gemeinde table based on their
    number of inhabitants'''
    tbx = DummyTbx()
    workspace = 'FGDB_Basisdaten_deutschland.gdb'
    gr_df = tbx.table_to_dataframe('Gemeindegroessenklassen',
                                   workspace=workspace,
                                   is_base_table=True)
    # set nan values to max integer (for highest groessenklasse)
    gr_df.loc[np.isnan(gr_df['bis']), 'bis'] = sys.maxint
    gem_columns = [
        'RS', 'Einwohner', 'groessenklasse', 'vwg_rs', 'vwg_groessenklasse'
    ]
    gem_table = tbx.table_to_dataframe('bkg_gemeinden',
                                       columns=gem_columns,
                                       workspace=workspace,
                                       is_base_table=True)
    summed = gem_table.groupby('vwg_rs').sum()['Einwohner'].reset_index()
    summed.rename(columns={'Einwohner': 'vwg_Einwohner'}, inplace=True)
    gem_table = gem_table.merge(summed, on='vwg_rs')

    for index, gem in gem_table.iterrows():
        for gr_col, ew_col in [('groessenklasse', 'Einwohner'),
                               ('vwg_groessenklasse', 'vwg_Einwohner')]:
            ew = gem[ew_col]
            higher = ew >= gr_df['von']
            lower = ew < gr_df['bis']
            # take the id where both borders match
            match = gr_df['groessenklasse'][np.logical_and(higher,
                                                           lower)].values
            assert len(match) == 1
            gr_klasse = match[0]
            gem_table.loc[index, gr_col] = gr_klasse
    tbx.dataframe_to_table('bkg_gemeinden',
                           gem_table, ['RS'],
                           workspace=workspace,
                           is_base_table=True,
                           upsert=False)
예제 #18
0
class Sales(object):
    NULLFALL = 0
    PLANFALL = 1
    # time to separate same markets into 'Umfeld' and 'Abstand'
    # example: cut_off_time = 5 min
    #          nearest market = 4 min drive -> 'Umfeld'
    #          second market = 7 min drive -> 'Umfeld'
    #          third market = 12 min drive -> 'Abstand'
    # Note: switched from times to km -> cutoff is > 1 km
    relation_dist = 1  # km

    def __init__(self, df_distances, df_markets, df_zensus, debug=False,
                 projectname=''):

        self.distances = df_distances
        self.markets = df_markets
        self.zensus = df_zensus
        self.tbx = DummyTbx(projectname=projectname)
        self.debug = debug

    def calculate_nullfall(self):
        return self._calculate_sales(self.NULLFALL)

    def calculate_planfall(self):
        return self._calculate_sales(self.PLANFALL)

    def _calculate_sales(self, setting):
        df_markets = self._prepare_markets(self.markets, setting)
        df_markets.set_index('id', inplace=True)

        # drop rows with markets, that are not in the dataframe of markets
        # used for current settings
        # (e.g. planfall markets when current setting is nullfall)
        ids_not_in_df = np.setdiff1d(
            np.unique(self.distances['id_markt']), df_markets.index)
        distances = self.distances.drop(
            self.distances.index[np.in1d(self.distances['id_markt'],
                                         ids_not_in_df)])
        # calc with distances in kilometers
        distances['distanz'] /= 1000
        distances[distances < 0] = -1

        # in case of Nullfall take zensus points without planned areas
        if setting == self.NULLFALL:
            zensus = self.zensus[self.zensus['id_teilflaeche'] < 0]
        else:
            zensus = self.zensus

        df_kk = pd.DataFrame()
        df_kk['id_siedlungszelle'] = zensus['id']
        df_kk['kk'] = zensus['kk']
        kk_merged = distances.merge(df_kk, on='id_siedlungszelle')

        kk_matrix = kk_merged.pivot(index='id_markt',
                                    columns='id_siedlungszelle',
                                    values='kk')

        dist_matrix = kk_merged.pivot(index='id_markt',
                                      columns='id_siedlungszelle',
                                      values='distanz')
        dist_matrix = dist_matrix.fillna(0)

        n_cells = len(np.unique(distances['id_siedlungszelle']))
        attraction_matrix = pd.DataFrame(data=np.zeros(dist_matrix.shape),
                                         index=dist_matrix.index,
                                         columns=dist_matrix.columns)

        for index, market in df_markets.iterrows():
            dist = dist_matrix.loc[index]
            factor = market['exp_faktor']
            exponent = market['exponent']
            attraction_matrix.loc[index] = factor * np.exp(dist * exponent)

        unreachable = dist_matrix < 0
        attraction_matrix[unreachable] = 0
        betriebstyp_col = 'id_betriebstyp_nullfall' \
            if setting == self.NULLFALL else 'id_betriebstyp_planfall'
    
        masked_dist_matrix = dist_matrix.T
        masked_dist_matrix = masked_dist_matrix.mask(masked_dist_matrix < 0)
        
        # local providers 
        # no real competition, but only closest three per cell (copy/paste from
        # calc_competitors, had no time seperate implementation)
        is_lp = df_markets[betriebstyp_col] == 1
        local_markets = df_markets[is_lp]
        indices = local_markets.index
        local_masked_dist = masked_dist_matrix[local_markets.index]
        df_ranking = local_masked_dist.rank(axis=1, method='first')
        local_comp_matrix = pd.DataFrame(data=0, index=df_ranking.index,
                                         columns=df_ranking.columns)
        local_comp_matrix[df_ranking <= 3] = 1
        local_comp_matrix[np.isnan(df_ranking)] = 0
        local_comp_matrix = local_comp_matrix.T
        
        # small markets
        is_sm = df_markets[betriebstyp_col] == 2
        small_markets = df_markets[is_sm]
        small_comp_matrix = self.calc_competitors(
            masked_dist_matrix, small_markets)
        
        # big markets
        big_markets = df_markets[df_markets[betriebstyp_col] > 2]
        big_comp_matrix = self.calc_competitors(
            masked_dist_matrix, big_markets)
        
        # merge
        big_comp_matrix.loc[is_lp] = local_comp_matrix
        big_comp_matrix.loc[is_sm] = small_comp_matrix.loc[is_sm]
        
        competitor_matrix = big_comp_matrix
        competitor_matrix[dist_matrix < 0] = 0

        if self.debug:
            setting_str = 'Nullfall' if setting == self.NULLFALL else 'Planfall'
            arcpy.AddMessage('DEBUG: Schreibe Zwischenergebnisse')
            if setting == self.NULLFALL:
                self.write_intermediate_results(dist_matrix.transpose(),
                                                'Distanzmatrix')
            self.write_intermediate_results(
                attraction_matrix.transpose(),
                u'{}_erstes_Zwischenergebnis_KK_Anteile_Wahrsch'.format(setting_str))
            self.write_intermediate_results(
                competitor_matrix.transpose(),
                u'{}_zweites_Zwischenergebnis_Attraktivitaet'.format(setting_str))
            arcpy.AddMessage('DEBUG: Berechnung')

        # include competition between same market types in attraction_matrix
        attraction_matrix *= competitor_matrix.values

        probabilities = attraction_matrix / attraction_matrix.sum(axis=0)
        kk_flow = probabilities * kk_matrix
        kk_flow = kk_flow.fillna(0)

        if self.debug:
            arcpy.AddMessage('DEBUG: Schreibe weitere Zwischenergebnisse')
            self.write_intermediate_results(
                attraction_matrix.transpose(),
                u'{}_drittes_Zwischenergebnis_Verteilungsmassstab_erster_Schritt'.format(setting_str))
            self.write_intermediate_results(
                probabilities.transpose(),
                u'{}_viertes_Zwischenergebnis_Verteilungsmassstab_zweiter_Schritt'.format(setting_str))
            self.write_intermediate_results(
                kk_flow.transpose(),
                u'{}_fuenftes_Zwischenergebnis_Kaufkraftstroeme'.format(setting_str))

        return kk_flow

    def write_intermediate_results(self, dataframe, table):
        self.tbx.insert_dataframe_in_table(
            table, dataframe.reset_index(),
            workspace='FGDB_Standortkonkurrenz_Supermaerkte.gdb',
            create=True)

    def calc_competitors(self, masked_dist_matrix, df_markets):
        """
        account competition through other markets of the same brand
        """
        cutoff_dist = self.relation_dist
        results = pd.DataFrame(data=1., index=masked_dist_matrix.index,
                               columns=masked_dist_matrix.columns)
        competing_markets = df_markets[['id_kette']]
        for id_kette in np.unique(competing_markets['id_kette']):
            markets_of_same_type = \
                competing_markets[competing_markets['id_kette'] == id_kette]
            if len(markets_of_same_type['id_kette']) == 1 or id_kette == 0:
                continue
            indices = list(markets_of_same_type.index)
            number_of_competing_markets = len(indices)
            same_type_dist_matrix = masked_dist_matrix[indices]
            df_ranking = same_type_dist_matrix.rank(axis=1, method='first')
            nearest_three_mask = df_ranking <= 3
            df_ranking = df_ranking.mask((nearest_three_mask==False))
            cutoff_dist_matrix = same_type_dist_matrix.copy()
            cutoff_dist_matrix['Minimum'] = \
                cutoff_dist_matrix.loc[:, indices].min(axis=1)
            # differences between way to nearest market and other markets
            # set all distances relative to nearest market
            cutoff_dist_matrix = cutoff_dist_matrix.sub(
                cutoff_dist_matrix['Minimum'], axis=0)
            del cutoff_dist_matrix['Minimum']
            cutoff_dist_matrix = cutoff_dist_matrix.mask((nearest_three_mask==False))
            cutoff_dist_matrix =  cutoff_dist_matrix.round(2)
            #is_near = cutoff_dist_matrix <= cutoff_dist
            is_near = np.logical_or(cutoff_dist_matrix < cutoff_dist,
                                    np.isclose(cutoff_dist_matrix, cutoff_dist))
            df_ranking['Umkreis'] = is_near.sum(axis=1)
            #same_type_dist_ranking['Abstand'] = \
                #number_of_competing_markets - is_near['Umkreis']
            for market_id in indices:
                # note: is_near[market_id] indicates if market
                #       is in 'Umkreis' (meaning is one of the nearest markets)
                # write data for near markets with:
                # -> 1 near market
                factor = df_markets.loc[market_id]['ein_Markt_in_Naehe']
                results.loc[np.logical_and(is_near[market_id]==True,
                                           df_ranking['Umkreis']==1),
                            market_id] = factor
                # -> 2 near markets
                factor = df_markets.loc[market_id]['zwei_Maerkte_in_Naehe']
                results.loc[np.logical_and(is_near[market_id]==True,
                                            df_ranking['Umkreis']==2),
                            market_id] = factor
                # -> more than 2 near markets
                factor = df_markets.loc[market_id]['drei_Maerkte_in_Naehe']
                results.loc[np.logical_and(is_near[market_id]==True,
                                           df_ranking['Umkreis']==3),
                            market_id] = factor
                # write data for far markets with:
                # -> market is far; 1 near market exists; 
                # market is closer than posible other far markets
                factor = df_markets.loc[market_id]\
                    ['zweiter_Markt_mit_Abstand_zum_ersten']
                results.loc[np.logical_and(
                    is_near[market_id]==False,
                    np.logical_and(df_ranking['Umkreis']==1,
                                   df_ranking[market_id]==2)),
                            market_id] = factor
                # -> market is far; 1 near market exists;
                # another far market exists that is closer to cell
                factor = df_markets.loc[market_id]\
                    ['dritter_Markt_mit_Abstand_zum_ersten']
                results.loc[np.logical_and(
                    is_near[market_id]==False,
                    np.logical_and(df_ranking['Umkreis']==1,
                                   df_ranking[market_id]==3)),
                            market_id] = factor
                # -> market is far, 2 near markets
                factor = df_markets.loc[market_id]\
                    ['dritter_Markt_mit_Abstand_zum_ersten_und_zweiten']
                results.loc[np.logical_and(is_near[market_id]==False,
                                           df_ranking['Umkreis']==2),
                            market_id] = factor
            # if more than 3 markets: markets 4 to end set to 0
            # if market 3 and 4 have same distance: keep both
            results.loc[:, (indices)] = results.loc[:, indices].mask(
                nearest_three_mask==False, 0.)
        # Return results in shape of dist_matrix
        res = results.T
        return res

    def get_dist_matrix(self):
        # Dataframe for distances
        dist_matrix = self.distances.pivot(index='id_markt',
                                           columns='id_siedlungszelle',
                                           values='distanz')
        return dist_matrix

    def _prepare_markets(self, df_markets, setting):
        """
        setting - nullfall or planfall
        """
        base_ws = 'FGDB_Standortkonkurrenz_Supermaerkte_Tool.gdb'
        betriebstyp_col = 'id_betriebstyp_nullfall' \
            if setting == self.NULLFALL else 'id_betriebstyp_planfall'

        # ignore markets that don't exist yet resp. are closed
        df_markets = df_markets[df_markets[betriebstyp_col] != 0]

        df_communities = self.tbx.table_to_dataframe(
            table_name='bkg_gemeinden',
            columns=['AGS', 'vwg_groessenklasse'],
            workspace='FGDB_Basisdaten_deutschland.gdb',
            is_base_table=True)

        # add groessenklassen to markets
        df_markets = df_markets.merge(df_communities, on='AGS')

        # dataframe for exponential parameters
        df_exponential_parameters = self.tbx.table_to_dataframe(
            table_name='Exponentialfaktoren',
            columns=['gem_groessenklasse', 'id_kette', 'id_betriebstyp',
                     'exponent', 'exp_faktor'],
            workspace=base_ws, is_base_table=True)

        df_attractivity_factors = self.tbx.table_to_dataframe(
            table_name='Attraktivitaetsfaktoren',
            workspace=base_ws, is_base_table=True)

        attractivity_cols = ['ein_Markt_in_Naehe', 'zwei_Maerkte_in_Naehe',
                             'drei_Maerkte_in_Naehe',
                             'zweiter_Markt_mit_Abstand_zum_ersten',
                             'dritter_Markt_mit_Abstand_zum_ersten',
                             'dritter_Markt_mit_Abstand_zum_ersten_und_zweiten']

        # add columns to markets
        df_markets['exponent'] = 0
        df_markets['exp_faktor'] = 0
        for col in attractivity_cols:
            df_markets[col] = 0

        # add the parameters to markets
        for index, market in df_markets.iterrows():
            gr_klasse = int(market['vwg_groessenklasse'])
            id_kette = market['id_kette']
            id_betriebstyp = market[betriebstyp_col]

            def get_entry_idx(df, id_kette, id_betriebstyp):
                '''look up for an entry in given df and return index
                (default if special one not found),
                scheme is the same for tables attractivity and exp. factors'''
                # look for entry of combination kette/betriebstyp
                idx = np.logical_and(
                    df['id_kette'] == id_kette,
                    df['id_betriebstyp'] == id_betriebstyp)
                # take the default entry for kette if combination is not found
                if idx.sum() == 0:
                    idx = np.logical_and(
                        df['id_kette'] == 0,
                        df['id_betriebstyp'] == id_betriebstyp)
                return idx

            # exp. factors
            df_exp_gr_klasse = df_exponential_parameters[
                df_exponential_parameters['gem_groessenklasse'] == gr_klasse]
            idx = get_entry_idx(df_exp_gr_klasse, id_kette, id_betriebstyp)
            entry = df_exp_gr_klasse[idx]
            df_markets.loc[index, 'exponent'] = entry['exponent'].values[0]
            df_markets.loc[index, 'exp_faktor'] = entry['exp_faktor'].values[0]

            # attractivity
            idx = get_entry_idx(df_attractivity_factors,
                                id_kette, id_betriebstyp)
            entry = df_attractivity_factors[idx]
            for col in attractivity_cols:
                df_markets.loc[index, col] = entry[col].values[0]


        # adapt column names of df_exponential_parameters to df_markets
        # for merge
        df_exponential_parameters.columns = ['groessenklasse', 'id_kette',
                                             'id_betriebstyp_nullfall',
                                             'exponent', 'exp_faktor']

        return df_markets
예제 #19
0
class Sales(object):
    NULLFALL = 0
    PLANFALL = 1
    # time to separate same markets into 'Umfeld' and 'Abstand'
    # example: cut_off_time = 5 min
    #          nearest market = 4 min drive -> 'Umfeld'
    #          second market = 7 min drive -> 'Umfeld'
    #          third market = 12 min drive -> 'Abstand'
    # Note: switched from times to km -> cutoff is > 1 km
    relation_dist = 1  # km

    def __init__(self,
                 df_distances,
                 df_markets,
                 df_zensus,
                 debug=False,
                 projectname=''):

        self.distances = df_distances
        self.markets = df_markets
        self.zensus = df_zensus
        self.tbx = DummyTbx(projectname=projectname)
        self.debug = debug

    def calculate_nullfall(self):
        return self._calculate_sales(self.NULLFALL)

    def calculate_planfall(self):
        return self._calculate_sales(self.PLANFALL)

    def _calculate_sales(self, setting):
        df_markets = self._prepare_markets(self.markets, setting)
        df_markets.set_index('id', inplace=True)

        # drop rows with markets, that are not in the dataframe of markets
        # used for current settings
        # (e.g. planfall markets when current setting is nullfall)
        ids_not_in_df = np.setdiff1d(np.unique(self.distances['id_markt']),
                                     df_markets.index)
        distances = self.distances.drop(self.distances.index[np.in1d(
            self.distances['id_markt'], ids_not_in_df)])
        # calc with distances in kilometers
        distances['distanz'] /= 1000
        distances[distances < 0] = -1

        # in case of Nullfall take zensus points without planned areas
        if setting == self.NULLFALL:
            zensus = self.zensus[self.zensus['id_teilflaeche'] < 0]
        else:
            zensus = self.zensus

        df_kk = pd.DataFrame()
        df_kk['id_siedlungszelle'] = zensus['id']
        df_kk['kk'] = zensus['kk']
        kk_merged = distances.merge(df_kk, on='id_siedlungszelle')

        kk_matrix = kk_merged.pivot(index='id_markt',
                                    columns='id_siedlungszelle',
                                    values='kk')

        dist_matrix = kk_merged.pivot(index='id_markt',
                                      columns='id_siedlungszelle',
                                      values='distanz')
        dist_matrix = dist_matrix.fillna(0)

        n_cells = len(np.unique(distances['id_siedlungszelle']))
        attraction_matrix = pd.DataFrame(data=np.zeros(dist_matrix.shape),
                                         index=dist_matrix.index,
                                         columns=dist_matrix.columns)

        for index, market in df_markets.iterrows():
            dist = dist_matrix.loc[index]
            factor = market['exp_faktor']
            exponent = market['exponent']
            attraction_matrix.loc[index] = factor * np.exp(dist * exponent)

        unreachable = dist_matrix < 0
        attraction_matrix[unreachable] = 0
        betriebstyp_col = 'id_betriebstyp_nullfall' \
            if setting == self.NULLFALL else 'id_betriebstyp_planfall'

        masked_dist_matrix = dist_matrix.T
        masked_dist_matrix = masked_dist_matrix.mask(masked_dist_matrix < 0)

        # local providers
        # no real competition, but only closest three per cell (copy/paste from
        # calc_competitors, had no time seperate implementation)
        is_lp = df_markets[betriebstyp_col] == 1
        local_markets = df_markets[is_lp]
        indices = local_markets.index
        local_masked_dist = masked_dist_matrix[local_markets.index]
        df_ranking = local_masked_dist.rank(axis=1, method='first')
        local_comp_matrix = pd.DataFrame(data=0,
                                         index=df_ranking.index,
                                         columns=df_ranking.columns)
        local_comp_matrix[df_ranking <= 3] = 1
        local_comp_matrix[np.isnan(df_ranking)] = 0
        local_comp_matrix = local_comp_matrix.T

        # small markets
        is_sm = df_markets[betriebstyp_col] == 2
        small_markets = df_markets[is_sm]
        small_comp_matrix = self.calc_competitors(masked_dist_matrix,
                                                  small_markets)

        # big markets
        big_markets = df_markets[df_markets[betriebstyp_col] > 2]
        big_comp_matrix = self.calc_competitors(masked_dist_matrix,
                                                big_markets)

        # merge
        big_comp_matrix.loc[is_lp] = local_comp_matrix
        big_comp_matrix.loc[is_sm] = small_comp_matrix.loc[is_sm]

        competitor_matrix = big_comp_matrix
        competitor_matrix[dist_matrix < 0] = 0

        if self.debug:
            setting_str = 'Nullfall' if setting == self.NULLFALL else 'Planfall'
            arcpy.AddMessage('DEBUG: Schreibe Zwischenergebnisse')
            if setting == self.NULLFALL:
                self.write_intermediate_results(dist_matrix.transpose(),
                                                'Distanzmatrix')
            self.write_intermediate_results(
                attraction_matrix.transpose(),
                u'{}_erstes_Zwischenergebnis_KK_Anteile_Wahrsch'.format(
                    setting_str))
            self.write_intermediate_results(
                competitor_matrix.transpose(),
                u'{}_zweites_Zwischenergebnis_Attraktivitaet'.format(
                    setting_str))
            arcpy.AddMessage('DEBUG: Berechnung')

        # include competition between same market types in attraction_matrix
        attraction_matrix *= competitor_matrix.values

        probabilities = attraction_matrix / attraction_matrix.sum(axis=0)
        kk_flow = probabilities * kk_matrix
        kk_flow = kk_flow.fillna(0)

        if self.debug:
            arcpy.AddMessage('DEBUG: Schreibe weitere Zwischenergebnisse')
            self.write_intermediate_results(
                attraction_matrix.transpose(),
                u'{}_drittes_Zwischenergebnis_Verteilungsmassstab_erster_Schritt'
                .format(setting_str))
            self.write_intermediate_results(
                probabilities.transpose(),
                u'{}_viertes_Zwischenergebnis_Verteilungsmassstab_zweiter_Schritt'
                .format(setting_str))
            self.write_intermediate_results(
                kk_flow.transpose(),
                u'{}_fuenftes_Zwischenergebnis_Kaufkraftstroeme'.format(
                    setting_str))

        return kk_flow

    def write_intermediate_results(self, dataframe, table):
        self.tbx.insert_dataframe_in_table(
            table,
            dataframe.reset_index(),
            workspace='FGDB_Standortkonkurrenz_Supermaerkte.gdb',
            create=True)

    def calc_competitors(self, masked_dist_matrix, df_markets):
        """
        account competition through other markets of the same brand
        """
        cutoff_dist = self.relation_dist
        results = pd.DataFrame(data=1.,
                               index=masked_dist_matrix.index,
                               columns=masked_dist_matrix.columns)
        competing_markets = df_markets[['id_kette']]
        for id_kette in np.unique(competing_markets['id_kette']):
            markets_of_same_type = \
                competing_markets[competing_markets['id_kette'] == id_kette]
            if len(markets_of_same_type['id_kette']) == 1 or id_kette == 0:
                continue
            indices = list(markets_of_same_type.index)
            number_of_competing_markets = len(indices)
            same_type_dist_matrix = masked_dist_matrix[indices]
            df_ranking = same_type_dist_matrix.rank(axis=1, method='first')
            nearest_three_mask = df_ranking <= 3
            df_ranking = df_ranking.mask((nearest_three_mask == False))
            cutoff_dist_matrix = same_type_dist_matrix.copy()
            cutoff_dist_matrix['Minimum'] = \
                cutoff_dist_matrix.loc[:, indices].min(axis=1)
            # differences between way to nearest market and other markets
            # set all distances relative to nearest market
            cutoff_dist_matrix = cutoff_dist_matrix.sub(
                cutoff_dist_matrix['Minimum'], axis=0)
            del cutoff_dist_matrix['Minimum']
            cutoff_dist_matrix = cutoff_dist_matrix.mask(
                (nearest_three_mask == False))
            cutoff_dist_matrix = cutoff_dist_matrix.round(2)
            #is_near = cutoff_dist_matrix <= cutoff_dist
            is_near = np.logical_or(
                cutoff_dist_matrix < cutoff_dist,
                np.isclose(cutoff_dist_matrix, cutoff_dist))
            df_ranking['Umkreis'] = is_near.sum(axis=1)
            #same_type_dist_ranking['Abstand'] = \
            #number_of_competing_markets - is_near['Umkreis']
            for market_id in indices:
                # note: is_near[market_id] indicates if market
                #       is in 'Umkreis' (meaning is one of the nearest markets)
                # write data for near markets with:
                # -> 1 near market
                factor = df_markets.loc[market_id]['ein_Markt_in_Naehe']
                results.loc[np.logical_and(is_near[market_id] ==
                                           True, df_ranking['Umkreis'] == 1),
                            market_id] = factor
                # -> 2 near markets
                factor = df_markets.loc[market_id]['zwei_Maerkte_in_Naehe']
                results.loc[np.logical_and(is_near[market_id] ==
                                           True, df_ranking['Umkreis'] == 2),
                            market_id] = factor
                # -> more than 2 near markets
                factor = df_markets.loc[market_id]['drei_Maerkte_in_Naehe']
                results.loc[np.logical_and(is_near[market_id] ==
                                           True, df_ranking['Umkreis'] == 3),
                            market_id] = factor
                # write data for far markets with:
                # -> market is far; 1 near market exists;
                # market is closer than posible other far markets
                factor = df_markets.loc[market_id]\
                    ['zweiter_Markt_mit_Abstand_zum_ersten']
                results.loc[np.logical_and(
                    is_near[market_id] == False,
                    np.logical_and(df_ranking['Umkreis'] ==
                                   1, df_ranking[market_id] == 2)),
                            market_id] = factor
                # -> market is far; 1 near market exists;
                # another far market exists that is closer to cell
                factor = df_markets.loc[market_id]\
                    ['dritter_Markt_mit_Abstand_zum_ersten']
                results.loc[np.logical_and(
                    is_near[market_id] == False,
                    np.logical_and(df_ranking['Umkreis'] ==
                                   1, df_ranking[market_id] == 3)),
                            market_id] = factor
                # -> market is far, 2 near markets
                factor = df_markets.loc[market_id]\
                    ['dritter_Markt_mit_Abstand_zum_ersten_und_zweiten']
                results.loc[np.logical_and(is_near[market_id] ==
                                           False, df_ranking['Umkreis'] == 2),
                            market_id] = factor
            # if more than 3 markets: markets 4 to end set to 0
            # if market 3 and 4 have same distance: keep both
            results.loc[:, (indices)] = results.loc[:, indices].mask(
                nearest_three_mask == False, 0.)
        # Return results in shape of dist_matrix
        res = results.T
        return res

    def get_dist_matrix(self):
        # Dataframe for distances
        dist_matrix = self.distances.pivot(index='id_markt',
                                           columns='id_siedlungszelle',
                                           values='distanz')
        return dist_matrix

    def _prepare_markets(self, df_markets, setting):
        """
        setting - nullfall or planfall
        """
        base_ws = 'FGDB_Standortkonkurrenz_Supermaerkte_Tool.gdb'
        betriebstyp_col = 'id_betriebstyp_nullfall' \
            if setting == self.NULLFALL else 'id_betriebstyp_planfall'

        # ignore markets that don't exist yet resp. are closed
        df_markets = df_markets[df_markets[betriebstyp_col] != 0]

        df_communities = self.tbx.table_to_dataframe(
            table_name='bkg_gemeinden',
            columns=['AGS', 'vwg_groessenklasse'],
            workspace='FGDB_Basisdaten_deutschland.gdb',
            is_base_table=True)

        # add groessenklassen to markets
        df_markets = df_markets.merge(df_communities, on='AGS')

        # dataframe for exponential parameters
        df_exponential_parameters = self.tbx.table_to_dataframe(
            table_name='Exponentialfaktoren',
            columns=[
                'gem_groessenklasse', 'id_kette', 'id_betriebstyp', 'exponent',
                'exp_faktor'
            ],
            workspace=base_ws,
            is_base_table=True)

        df_attractivity_factors = self.tbx.table_to_dataframe(
            table_name='Attraktivitaetsfaktoren',
            workspace=base_ws,
            is_base_table=True)

        attractivity_cols = [
            'ein_Markt_in_Naehe', 'zwei_Maerkte_in_Naehe',
            'drei_Maerkte_in_Naehe', 'zweiter_Markt_mit_Abstand_zum_ersten',
            'dritter_Markt_mit_Abstand_zum_ersten',
            'dritter_Markt_mit_Abstand_zum_ersten_und_zweiten'
        ]

        # add columns to markets
        df_markets['exponent'] = 0
        df_markets['exp_faktor'] = 0
        for col in attractivity_cols:
            df_markets[col] = 0

        # add the parameters to markets
        for index, market in df_markets.iterrows():
            gr_klasse = int(market['vwg_groessenklasse'])
            id_kette = market['id_kette']
            id_betriebstyp = market[betriebstyp_col]

            def get_entry_idx(df, id_kette, id_betriebstyp):
                '''look up for an entry in given df and return index
                (default if special one not found),
                scheme is the same for tables attractivity and exp. factors'''
                # look for entry of combination kette/betriebstyp
                idx = np.logical_and(df['id_kette'] == id_kette,
                                     df['id_betriebstyp'] == id_betriebstyp)
                # take the default entry for kette if combination is not found
                if idx.sum() == 0:
                    idx = np.logical_and(
                        df['id_kette'] == 0,
                        df['id_betriebstyp'] == id_betriebstyp)
                return idx

            # exp. factors
            df_exp_gr_klasse = df_exponential_parameters[
                df_exponential_parameters['gem_groessenklasse'] == gr_klasse]
            idx = get_entry_idx(df_exp_gr_klasse, id_kette, id_betriebstyp)
            entry = df_exp_gr_klasse[idx]
            df_markets.loc[index, 'exponent'] = entry['exponent'].values[0]
            df_markets.loc[index, 'exp_faktor'] = entry['exp_faktor'].values[0]

            # attractivity
            idx = get_entry_idx(df_attractivity_factors, id_kette,
                                id_betriebstyp)
            entry = df_attractivity_factors[idx]
            for col in attractivity_cols:
                df_markets.loc[index, col] = entry[col].values[0]

        # adapt column names of df_exponential_parameters to df_markets
        # for merge
        df_exponential_parameters.columns = [
            'groessenklasse', 'id_kette', 'id_betriebstyp_nullfall',
            'exponent', 'exp_faktor'
        ]

        return df_markets
def kostenkennwerte(project):
    """
    Check if Kostenkennwerte_Linienelemente hat data.
    If not: Copy from Netze_und_Netzelemente (only if Shape == Line) and
    multiply by interest- and time-factor

    Parameters
    ----------
    project : String
        name of the active project

    """
    table = 'Kostenkennwerte_Linienelemente'
    workspace_tool = 'FGDB_Kosten_Tool.gdb'
    tbx = DummyTbx()
    tbx.set_active_project(project)
    # check if table Kostenkennwerte_Linienelemente contains content
    df_costs_line_elements = tbx.table_to_dataframe(
        table, workspace='FGDB_Kosten.gdb')
    if len(df_costs_line_elements) != 0:
        return
    # calculate time factor
    current_year = int(time.strftime("%Y"))
    df_frame_data = tbx.table_to_dataframe('Rahmendaten',
                                        workspace=workspace_tool,
                                        is_base_table=True)
    interest = df_frame_data['Zins']
    reference_year = df_frame_data['Stand_Kostenkennwerte']
    time_factor = (1 + interest) ** (current_year - reference_year)
    # get regional factor
    ags = tbx.query_table('Projektrahmendaten',
                          workspace='FGDB_Definition_Projekt.gdb',
                          columns=['AGS'])[0][0]
    regional_factor = tbx.table_to_dataframe(
        'bkg_gemeinden', workspace='FGDB_Basisdaten_deutschland.gdb',
        columns=['BKI_Regionalfaktor'], where="AGS='{}'".format(str(ags)),
        is_base_table=True)
    # fill table Kostenkennwerte_Linienelemente
    regional_time_factor = time_factor * \
        regional_factor.loc[:, 'BKI_Regionalfaktor']
    rounding_factor = 5
    df_networks = tbx.table_to_dataframe('Netze_und_Netzelemente',
                                         workspace='FGDB_Kosten_Tool.gdb',
                                         where="Typ='{}'".format('Linie'),
                                         is_base_table=True)
    # multiply with factors
    df_networks.loc[:, ['Euro_EH', 'Cent_BU', 'Euro_EN']] *= \
        regional_time_factor[0]
    # round to 5
    df_networks.loc[:, ['Euro_EH', 'Cent_BU', 'Euro_EN']] = \
        round_df_to(df_networks.loc[:, ['Euro_EH', 'Cent_BU', 'Euro_EN']],
                    rounding_factor)

    tbx.dataframe_to_table(table, df_networks,
                           pkeys=['ID'], workspace='FGDB_Kosten.gdb',
                           upsert=True)
    return