Example #1
0
 def __init__(self, service, name='', position=geopy.Point(45.4641, 9.1919),
              ageDiffusionIn={}, scaleIn=1, attributesIn={}):
     assert isinstance(position, geopy.Point), 'Position must be a geopy Point' 
     assert isinstance(service, ServiceType), 'Service must belong to the Eum'
     assert isinstance(name, str), 'Name must be a string'
     
     assert (np.isscalar(scaleIn)) & (scaleIn>0) , 'Scale must be a positive scalar'
     assert isinstance(attributesIn, dict), 'Attributes can be provided in a dict'
     
     self.name = name
     self.service = service
     
     # A ServiceType can have many sites, so each unit has its own. 
     # Moreover, a site is not uniquely assigned to a service
     self.site = position
     
     self.scale = scaleIn # store scale info
     self.attributes = attributesIn# dictionary
     
     # how the service availablity area varies for different age groups
     if ageDiffusionIn==None:
         self.ageDiffusion = {g: (
             1 + .005*np.round(np.random.normal(),2))*self.scale for g in AgeGroup.all()} 
     else:
         assert set(ageDiffusionIn.keys()) <= set(AgeGroup.all()), 'Diffusion keys should be AgeGroups'
         #assert all
         self.ageDiffusion = ageDiffusionIn
         
     # define kernel taking scale into account
     self.kernel = {g: gaussKern(length_scale=l*self.scale) for g, l in self.ageDiffusion.items()}
Example #2
0
 def __init__(self, mappedPositions):
     assert isinstance(mappedPositions, MappedPositionsFrame), 'Expected MappedPositionsFrame'
     self.mappedPositions = mappedPositions
     
     # initialise for all service types
     super().__init__({service: pd.DataFrame(
         np.zeros([mappedPositions.shape[0], len(AgeGroup.all())]),  
         index=mappedPositions.index, columns=AgeGroup.all()) 
                         for service in ServiceType})
Example #3
0
 def compute_kpi_for_localized_services(self):
     assert self.bEvaluated, 'Have you evaluated service values before making averages for KPIs?'
     # get mean service levels by quartiere, weighting according to the number of citizens
     for service, data in self.serviceValues.items():
         checkRange = {}
         for col in self.agesFrame.columns: # iterate over columns as Enums are not orderable...
             if col in service.demandAges:
                 self.weightedValues[service][col] = pd.Series.multiply(
                     data[col], self.agesFrame[col])
             else:
                 self.weightedValues[service][col] = np.nan*data[col]
         
         checkRange = (data.groupby(common_cfg.IdQuartiereColName).min()-np.finfo(float).eps,
                           data.groupby(common_cfg.IdQuartiereColName).max()+np.finfo(float).eps)
         
         # sum weighted fractions by neighbourhood
         weightedSums = self.weightedValues[service].groupby(common_cfg.IdQuartiereColName).sum()
         # set to NaN value the AgeGroups that have no people or there is no demand for the service
         weightedSums[self.agesTotals == 0] = np.nan
         weightedSums.iloc[:, ~weightedSums.columns.isin(service.demandAges)] = np.nan
         
         self.quartiereKPI[service] = (weightedSums/self.agesTotals).reindex(
             columns=AgeGroup.all(), copy=False)
         
         # check that the weighted mean lies between min and max in the neighbourhood
         for col in self.quartiereKPI[service].columns:
             bGood = (self.quartiereKPI[service][col].between(
                 checkRange[0][col], checkRange[1][col]) | self.quartiereKPI[service][col].isnull())
             assert all(bGood), 'Unexpected error in mean computation'
         
     return self.quartiereKPI
Example #4
0
    def __init__(self, dfIn, bDuplicatesCheck=True):
        assert isinstance(dfIn, pd.DataFrame), 'Input DataFrame expected'
        # initialise and assign base DataFrame properties
        super().__init__()
        self.__dict__.update(dfIn.copy().__dict__)

        # prepare the AgeGroups cardinalities
        groupsCol = 'ageGroup'
        peopleBySampleAge = common_cfg.fill_sample_ages_in_cpa_columns(self)
        dataByGroup = peopleBySampleAge.rename(AgeGroup.find_AgeGroup, axis='columns').T
        dataByGroup.index.name = groupsCol  # index is now given by AgeGroup items
        dataByGroup = dataByGroup.reset_index()  # extract to convert to categorical and groupby
        dataByGroup[groupsCol] = dataByGroup[groupsCol].astype('category')
        agesBySection = dataByGroup.groupby(groupsCol).sum().T
        # self['Ages'] = pd.Series(agesBySection.T.to_dict()) # assign dict to each section
        self['PeopleTot'] = agesBySection.sum(axis=1)
        # report all ages
        for col in AgeGroup.all():
            self[col] = agesBySection.get(col, np.zeros_like(self.iloc[:, 0]))

        # assign centroid as position
        geopyValues = self['geometry'].apply(
            lambda pos: geopy.Point(pos.centroid.y, pos.centroid.x))
        self[common_cfg.positionsCol] = geopyValues

        if bDuplicatesCheck:
            # check no location is repeated - takes a while
            assert not any(self[common_cfg.positionsCol].duplicated()), 'Repeated position found'
Example #5
0
    def __init__(self, df_input, b_duplicates_check=True):
        assert isinstance(df_input, pd.DataFrame), 'Input DataFrame expected'

        # initialise and assign base DataFrame properties
        # FIXME: this is not nice at all. Refactor to properly inherit from df
        super().__init__()
        self.__dict__.update(df_input.copy().__dict__)

        # prepare the AgeGroups cardinalities
        groups_col = 'ageGroup'
        people_by_sample_age = common_cfg.fill_sample_ages_in_cpa_columns(self)
        data_by_group = people_by_sample_age.rename(AgeGroup.find_age_group,
                                                    axis='columns').T
        # index is now given by AgeGroup items
        data_by_group.index.name = groups_col
        # extract to convert to categorical and groupby
        data_by_group = data_by_group.reset_index()
        data_by_group[groups_col] = \
            data_by_group[groups_col].astype('category')
        ages_by_section = data_by_group.groupby(groups_col).sum().T
        self['PeopleTot'] = ages_by_section.sum(axis=1)
        # report all ages
        for col in self.OUTPUT_AGES:
            self[col] = ages_by_section.get(col, np.zeros_like(self.iloc[:,
                                                                         0]))

        # extract long and lat and build geopy locations
        self[common_cfg.coord_col_names[0]] = self['geometry'].apply(
            lambda pos: pos.centroid.x)

        self[common_cfg.coord_col_names[1]] = self['geometry'].apply(
            lambda pos: pos.centroid.y)

        self[common_cfg.positions_col] = [
            geopy.Point(yx)
            for yx in zip(self[common_cfg.coord_col_names[::-1]].as_matrix())
        ]

        if b_duplicates_check:
            # check no location is repeated - takes a while
            assert not any(self[common_cfg.positions_col].duplicated()),\
                'Repeated position found'

        # cache ages frame and mapped positions for quicker access
        age_multi_index = [
            self[common_cfg.id_quartiere_col_name],
            self[common_cfg.positions_col].apply(tuple)
        ]
        self.ages_frame = self[AgeGroup.all()].set_index(age_multi_index)

        self.mapped_positions = MappedPositionsFrame(
            long=self[common_cfg.coord_col_names[0]],
            lat=self[common_cfg.coord_col_names[1]],
            geopy_pos=self[common_cfg.positions_col].tolist(),
            id_quartiere=self[common_cfg.id_quartiere_col_name].tolist())
Example #6
0
    def compute_kpi_for_istat_values(self):
        allQuartiere = self.demand.groupby(common_cfg.IdQuartiereColName).sum()

        dropColumns = [c for c in AgeGroup.all()+common_cfg.excludedColumns \
                       if c in allQuartiere.columns]
        quartiereData = allQuartiere.drop(dropColumns , axis=1)

        kpiFrame = istat_kpi.wrangle_istat_cpa2011(quartiereData, self.city)

        self.istatKPI = kpiFrame
        self.istatVitality = istat_kpi.compute_vitality_cpa2011(quartiereData)

        return self.istatKPI, self.istatVitality
Example #7
0
    def load(self, meanRadius):
        
        assert meanRadius, 'Please provide a reference radius for the mean library size'
        (propertData, locations) = super().extract_locations()
        
        nameCol = 'denominazioni.ufficiale'
        typeCol = 'tipologia-funzionale'
        
        # Modifica e specifica che per le fasce d'età
        typeAgeDict = {'Specializzata': {group:1 for group in AgeGroup.all()},
                      'Importante non specializzata': {group:1 for group in AgeGroup.all()},
                      'Pubblica': {group:1 for group in AgeGroup.all()},
                      'NON SPECIFICATA': {AgeGroup.ChildPrimary:1},
                      'Scolastica': {AgeGroup.ChildPrimary:1},
                      'Istituto di insegnamento superiore': {AgeGroup.ChildPrimary:1},
                      'Nazionale': {AgeGroup.ChildPrimary:1},}
        
        libraryTypes = propertData[typeCol].unique()
        assert set(libraryTypes) <= set(typeAgeDict.keys()), 'Unrecognized types in input'
        
        unitList = []
                
        for libType in libraryTypes:
            bThisGroup = propertData[typeCol]==libType
            typeData = propertData[bThisGroup]
            typeLocations = [l for i,l in enumerate(locations) if bThisGroup[i]]

            for iUnit in range(typeData.shape[0]):
                rowData = typeData.iloc[iUnit,:]
                attrDict = {'level':libType}
                thisUnit = ServiceUnit(self.servicetype, 
                        name=rowData[nameCol], 
                        position=typeLocations[iUnit], 
                        ageDiffusionIn=typeAgeDict[libType],
                        attributesIn=attrDict)
                unitList.append(thisUnit)
        
        return unitList
Example #8
0
 def evaluate_services_at(self, mappedPositions):
     assert isinstance(mappedPositions, MappedPositionsFrame), 'Expected MappedPositionsFrame'
     # set all age groups as output default
     outputAgeGroups = AgeGroup.all()
     # initialise output with dedicated class
     valuesStore = ServiceValues(mappedPositions)
     
     # loop over different services
     for thisServType in self.outputServices:
         serviceUnits = [u for u in self.units if u.service == thisServType]
         if not serviceUnits:
             continue
         else:
             for thisAgeGroup in outputAgeGroups:
                 unitValues = np.stack(list(map(
                     lambda x: x.evaluate(
                         valuesStore.positions, thisAgeGroup), serviceUnits)), axis=-1)
                 # aggregate unit contributions according to the service type norm
                 valuesStore[thisServType][thisAgeGroup] = thisServType.aggregate_units(unitValues)
     return valuesStore
Example #9
0
    def load(self, meanRadius):
        assert meanRadius, 'Please provide a reference radius for urban green'
        (propertData, locations) = super().extract_locations()

        nameCol = 'CODICEIDENTIFICATIVOFARMACIA'
        colAttributes = {'Descrizione': 'DESCRIZIONEFARMACIA', 'PartitaIva': 'PARTITAIVA'}

        unitList = []
        for iUnit in range(propertData.shape[0]):
            rowData = propertData.iloc[iUnit, :]
            attrDict = {name: rowData[col] for name, col in colAttributes.items()}
            thisUnit = ServiceUnit(self.servicetype,
                                   name=rowData[nameCol].astype(str),
                                   position=locations[iUnit],
                                   ageDiffusionIn={g: 1 for g in AgeGroup.all()},
                                   scaleIn=meanRadius,
                                   attributesIn=attrDict)
            unitList.append(thisUnit)

        return unitList
Example #10
0
    def make_serviceareas_output(self, precision=4):

        out = dict()

        # tool to format frame data that does not depend on age

        def prepare_frame_data(frame_in):
            frame_in = frame_in.round(precision)
            orig_type = frame_in.index.dtype.type
            data_dict = frame_in.reset_index().to_dict(orient='records')
            # restore type as pandas has a bug and casts to float if int
            for quartiere_data in data_dict:
                old_value = quartiere_data[common_cfg.id_quartiere_col_name]
                if orig_type in (np.int32, np.int64, int):
                    quartiere_data[common_cfg.id_quartiere_col_name] = int(
                        old_value)

            return data_dict

        # make istat layer
        out[common_cfg.istat_layer_name] = prepare_frame_data(self.istat_data)

        # make vitality layer
        out[common_cfg.vitality_layer_name] = prepare_frame_data(
            self.vitality_data)

        # make layers
        for area, layers in self.areas_tree.items():
            layer_list = []
            for service in layers:
                data = self.layers_data[service].round(precision)
                layer_list.append(
                    pd.Series(data[AgeGroup.all()].as_matrix().tolist(),
                              index=data.index,
                              name=service.name))
            area_data = pd.concat(layer_list, axis=1).reset_index()
            out[area.value] = area_data.to_dict(orient='records')

        return out
Example #11
0
    def __init__(self, demandFrame, serviceUnits, cityName):
        assert cityName in city_settings.cityNamesList, 'Unrecognized city name %s' % cityName
        assert isinstance(demandFrame, DemandFrame),'Demand frame expected'
        assert all([isinstance(su, ServiceUnit) for su in serviceUnits]),'Service units list expected'
        
        self.city = cityName
        self.demand = demandFrame
        self.sources = serviceUnits
        # initialise the service evaluator
        self.evaluator = ServiceEvaluator(serviceUnits)
        self.servicePositions = self.evaluator.servicePositions
        # initialise output values
        self.serviceValues = ServiceValues(self.demand.mappedPositions)
        self.bEvaluated = False
        self.weightedValues = ServiceValues(self.demand.mappedPositions)
        self.quartiereKPI = {}
        self.istatKPI = pd.DataFrame()

        # derive Ages frame
        ageMIndex = [demandFrame[common_cfg.IdQuartiereColName],
                         demandFrame[common_cfg.positionsCol].apply(tuple)]
        self.agesFrame = demandFrame[AgeGroup.all()].set_index(ageMIndex)
        self.agesTotals = self.agesFrame.groupby(level=0).sum()
Example #12
0
    def load(self, mean_radius=None):
        assert mean_radius, 'Please provide a reference radius for pharmacies'
        (propert_data, locations) = super().extract_locations()

        col_attributes = {
            'Descrizione': 'DESCRIZIONEFARMACIA',
            'PartitaIva': 'PARTITAIVA'
        }

        unit_list = []
        # We assume all pharmacies share the same scale, so only one
        # threshold is necessary
        cached_thresholds = None
        for i_unit in range(propert_data.shape[0]):
            row_data = propert_data.iloc[i_unit, :]
            attr_dict = {
                name: row_data[col]
                for name, col in col_attributes.items()
            }
            this_unit = ServiceUnit(
                self.servicetype,
                name=row_data[self.name_col].astype(str),
                unit_id=row_data[self.id_col],
                position=locations[i_unit],
                scale=mean_radius,
                age_diffusion={g: 1
                               for g in AgeGroup.all()},
                kernel_thresholds=cached_thresholds,
                attributes=attr_dict)

            unit_list.append(this_unit)
            # if there were no thresholds, cache the computed ones
            if not cached_thresholds:
                cached_thresholds = this_unit.ker_thresholds

        return unit_list
Example #13
0
    def __init__(self,
                 service,
                 name,
                 unit_id,
                 position,
                 scale,
                 age_diffusion=None,
                 kernel_thresholds=None,
                 attributes=None):
        assert isinstance(position,
                          geopy.Point), 'Position must be a geopy Point'
        assert isinstance(service,
                          ServiceType), 'Service must belong to the Eum'
        assert isinstance(name, str), 'Name must be a string'
        assert (np.isscalar(scale)) & \
               (scale > 0), 'Scale must be a positive scalar'
        assert set(age_diffusion.keys()) <= set(
            AgeGroup.all()), 'Diffusion keys should be AgeGroups'
        if not attributes:
            attributes = {}
        assert isinstance(attributes,
                          dict), 'Attributes have to be provided in a dict'
        if kernel_thresholds:
            assert set(kernel_thresholds.keys()) >= set(age_diffusion.keys()),\
                'Kernel thresholds if provided must' \
                ' be defined for every age diffusion key'
            b_thresholds_input = True
        else:
            b_thresholds_input = False

        self.name = name
        self.id = unit_id
        self.service = service

        # A ServiceType can have many sites, so each unit has its own.
        # Moreover, a site is not uniquely assigned to a service
        self.site = position
        self.coord_tuple = (position.latitude, position.longitude)

        self.scale = scale  # store scale info
        self.attributes = attributes  # dictionary

        # how the service availability area varies for different age groups
        self.age_diffusion = age_diffusion

        # define kernel taking scale into account
        self.kernel = {
            g: gaussKern(length_scale=l * self.scale)
            for g, l in self.age_diffusion.items()
        }

        # precompute kernel threshold per AgeGroup
        # initialise to Inf
        self.ker_thresholds = {g: np.Inf for g in AgeGroup.all()}
        if b_thresholds_input:
            assert all([isinstance(kern, gaussKern)
                        for kern in self.kernel.values()]),\
                'Unexpected kernel type in ServiceUnit'
            assert all([val > 0 for val in kernel_thresholds.values()]), \
                'Thresholds must be positive'
            self.ker_thresholds.update(kernel_thresholds)
        else:
            self._compute_kernel_thresholds()

        # initialise attendance
        self.attendance = np.nan
Example #14
0
class DemandFrame(pd.DataFrame):
    """A class to store demand units in row and
    make them available for aggregation"""

    OUTPUT_AGES = AgeGroup.all()
    _metadata = ['ages_frame', 'mapped_positions']

    def __init__(self, df_input, b_duplicates_check=True):
        assert isinstance(df_input, pd.DataFrame), 'Input DataFrame expected'

        # initialise and assign base DataFrame properties
        # FIXME: this is not nice at all. Refactor to properly inherit from df
        super().__init__()
        self.__dict__.update(df_input.copy().__dict__)

        # prepare the AgeGroups cardinalities
        groups_col = 'ageGroup'
        people_by_sample_age = common_cfg.fill_sample_ages_in_cpa_columns(self)
        data_by_group = people_by_sample_age.rename(AgeGroup.find_age_group,
                                                    axis='columns').T
        # index is now given by AgeGroup items
        data_by_group.index.name = groups_col
        # extract to convert to categorical and groupby
        data_by_group = data_by_group.reset_index()
        data_by_group[groups_col] = \
            data_by_group[groups_col].astype('category')
        ages_by_section = data_by_group.groupby(groups_col).sum().T
        self['PeopleTot'] = ages_by_section.sum(axis=1)
        # report all ages
        for col in self.OUTPUT_AGES:
            self[col] = ages_by_section.get(col, np.zeros_like(self.iloc[:,
                                                                         0]))

        # extract long and lat and build geopy locations
        self[common_cfg.coord_col_names[0]] = self['geometry'].apply(
            lambda pos: pos.centroid.x)

        self[common_cfg.coord_col_names[1]] = self['geometry'].apply(
            lambda pos: pos.centroid.y)

        self[common_cfg.positions_col] = [
            geopy.Point(yx)
            for yx in zip(self[common_cfg.coord_col_names[::-1]].as_matrix())
        ]

        if b_duplicates_check:
            # check no location is repeated - takes a while
            assert not any(self[common_cfg.positions_col].duplicated()),\
                'Repeated position found'

        # cache ages frame and mapped positions for quicker access
        age_multi_index = [
            self[common_cfg.id_quartiere_col_name],
            self[common_cfg.positions_col].apply(tuple)
        ]
        self.ages_frame = self[AgeGroup.all()].set_index(age_multi_index)

        self.mapped_positions = MappedPositionsFrame(
            long=self[common_cfg.coord_col_names[0]],
            lat=self[common_cfg.coord_col_names[1]],
            geopy_pos=self[common_cfg.positions_col].tolist(),
            id_quartiere=self[common_cfg.id_quartiere_col_name].tolist())

    def get_age_sample(self, age_group=None, n_sample=1000):

        if age_group is not None:
            coord, n_repeat = self.mapped_positions.align(
                self.ages_frame[age_group], axis=0)
        else:
            coord, n_repeat = self.mapped_positions.align(
                self.ages_frame.sum(axis=1), axis=0)
        idx = np.repeat(range(coord.shape[0]), n_repeat)
        coord = coord[common_cfg.coord_col_names].iloc[idx]
        sample = coord.sample(int(n_sample)).as_matrix()
        return sample[:, 0], sample[:, 1]

    @classmethod
    def create_from_istat_cpa(cls, city_name):
        """Constructor caller for DemandFrame"""
        city_config = city_settings.get_city_config(city_name)
        return cls(city_config.istat_cpa_data, b_duplicates_check=False)
Example #15
0
 def agesFrame(self):
     ageMIndex = [self[common_cfg.IdQuartiereColName],
                  self[common_cfg.positionsCol].apply(tuple)]
     return self[AgeGroup.all()].set_index(ageMIndex)