def __init__(self, address, db): for key in ['Year', 'Month', 'Arrest', 'Latitude', 'Longitude']: if key not in db.columns: raise ValueError('Database does not contain mandatory "{}" column.'.format(key)) if not isinstance(address, Address): raise TypeError('Receive address is not of type addressClass.Address') self.address = address district = address.district district_db = self._filter_db_by_district(district,db) self.district = district #Setting attributes to total district info self.dist_crime_density = iu.get_density(polygon = du.get_polygon(district), ammount = len(district_db)) self.dist_police_effectiveness = iu.effectiveness_police(data = district_db) self.dist_police_effectiveness_density = iu.effectiveness_sq_mile(polygon = du.get_polygon(district), data = district_db) #Setting attributes by month/year from the district self.dist_crime_density_month = self._generate_db_month_district_indicator(district_db, 'density') self.dist_police_effectiveness_month = self._generate_db_month_district_indicator(district_db,'effect') self.dist_police_effectiveness_density_month = self._generate_db_month_district_indicator(district_db, 'effectsq') circle_data = self._get_data_crime_circle(db) #Setting attributes to circle, total info self.circ_crime_density = iu.get_density(polygon = self._get_circle_boundaries(), ammount = len(circle_data)) self.circ_police_effectiveness = iu.effectiveness_police(data = circle_data) self.circ_police_effectiveness_density = iu.effectiveness_sq_mile(polygon = self._get_circle_boundaries(), data = circle_data) #Setting attributes to circle, by month/year self.circ_crime_density_month = self._generate_db_month_district_indicator(circle_data, 'density') self.circ_police_effectiveness_month = self._generate_db_month_district_indicator(circle_data,'effect') self.circ_police_effectiveness_density_month = self._generate_db_month_district_indicator(circle_data, 'effectsq')
def effectiveness_sq_mile_by_district(self): """ This method returns the effectiveness of the police station by mile square in every district. It returns a dictionary with the values, with key the number of district :return effectiveness_sq_mile """ effectiveness_sq_mile = {} for district in self.districts_contained: data_district = self[self['District'] == district] try: effectiveness_sqmile_district = iu.effectiveness_sq_mile(polygon = du.get_polygon(district), data = data_district) except ValueError as errmessage: raise MalformedCrimesDataFrame(errmessage) except iu.ZeroAreaPolygon: effectiveness_sq_mile[district] = np.nan else: effectiveness_sq_mile[district] = effectiveness_sqmile_district return effectiveness_sq_mile
def _generate_db_summarized_for_district(self, db): """ This functions returns a dataframe of the indicators per month/year. It filters the data in year and then in month, so that it can compute the indicators in each subset (month and year). As a note, it uses the following pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y') in order to create the index as a date type, making it easier to plot later. :param db: """ Years = db.Year.unique() data_ind = pd.DataFrame(columns=('density', 'effectiveness', 'effect_by_sqm')) for year in Years: data_year = db[db['Year'] == year] for month in range(1,13): data_district_month = data_year[data_year.Month == month] if len(data_district_month) == 0: data_ind.loc[ pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y')] =[np.nan, np.nan, np.nan] else: data_ind.loc[pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y')] = [iu.get_density(polygon=du.get_polygon(district_number=self.district), ammount= len(data_district_month)), iu.effectiveness_police(data=data_district_month), iu.effectiveness_sq_mile(polygon = du.get_polygon(self.district), data =data_district_month)] return data_ind
def _generate_db_month_district(self, district=0): """ Filters the DB according to the district and generates a dictionary with the number of crimes by month and year, for each indicator. If district is set to zero, computes the total of all Chicago. As a note, it uses the following pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y') in order to create the index as a date type, making it easier to plot later. If there is no crime data, it sets the value Nan from numpy. Returns a a dictionary of the number of crimes :return data_ind """ Years = self.Year.unique() data_ind = pd.DataFrame(columns=('density', 'effectiveness', 'effect_by_sqm')) #CHicago considers the total information by month/year if district == 0: area = 0 for district in self.districts_contained: area += proj.PolygonProjection(du.get_polygon(district)).calculate_area_in_miles() for year in Years: data_district_year = self[self['Year'] == year] for month in range(1,13): data_district_month = data_district_year[data_district_year['Month'] == month] if len(data_district_month) == 0: data_ind.loc[ pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y')] =[np.nan, np.nan, np.nan] else: data_ind.loc[ pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y')] = [ len(data_district_month)/ area, len(data_district_month[data_district_month['Arrest'] == True])*1.0 / len(data_district_month), (len(data_district_month[data_district_month['Arrest'] == True])*1.0 / len(data_district_month['Arrest']))/area] #Other case, it computes the indicator of the inputted district else: for year in Years: data_district_year = self[(self['District'] == district) & (self['Year'] == year)] for month in range(1,13): data_district_month = data_district_year[data_district_year['Month'] == month] if len(data_district_month) == 0: data_ind.loc[ pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y')] =[np.nan, np.nan, np.nan] else: data_ind.loc[ pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y')] = [iu.get_density(polygon = du.get_polygon(district), ammount = len(data_district_month)), iu.effectiveness_police(data_district_month), iu.effectiveness_sq_mile(polygon = du.get_polygon(district), data =data_district_month)] return data_ind