def test_get_polygon_invalid_inputs(self): for district in self.invalid_districts: with self.assertRaises(ValueError): databases_utils.get_polygon(district) for district in self.malformed_districts: with self.assertRaises(TypeError): databases_utils.get_polygon(district)
def _district_map_limits_calculator(district_number): """ Receives a district Number and returns the coordinates vertices to create a map, the latitudes and longitudes lists of the limits of the map and a list of vertices of the map. :param district_number: :return min_lon, max_lon, min_lat, max_lat, district_limits_lon, district_limits_lat, vertices_list """ if district_number not in du.DISTRICTS: # Validate if the district number is a valid Chicago district raise TypeError("District number is not a valid District of Chicago") # Save the coordinates of the bounds of the District into 'district_polygon' district_polygon = du.get_polygon(district_number) # Splits the coordinates into a list of Longitudes and a list of Latitudes district_limits_lat = [coordinate[0] for coordinate in district_polygon] district_limits_lon = [coordinate[1] for coordinate in district_polygon] # Create limits of the map to be slightly bigger than the limits of the district. min_lon = min(district_limits_lon) - 0.002 max_lon = max(district_limits_lon) + 0.002 min_lat = min(district_limits_lat) - 0.002 max_lat = max(district_limits_lat) + 0.002 # Create a list of the new vertices of the map vertices_list = [(min_lat, min_lon), (max_lat, min_lon), (max_lat, max_lon), (min_lat, max_lon)] return min_lon, max_lon, min_lat, max_lat, district_limits_lon, district_limits_lat, vertices_list
def __init__(self, address, db): for key in ['Year', 'Month', 'Arrest', 'Latitude', 'Longitude']: if key not in db.columns: raise ValueError('Database does not contain mandatory "{}" column.'.format(key)) if not isinstance(address, Address): raise TypeError('Receive address is not of type addressClass.Address') self.address = address district = address.district district_db = self._filter_db_by_district(district,db) self.district = district #Setting attributes to total district info self.dist_crime_density = iu.get_density(polygon = du.get_polygon(district), ammount = len(district_db)) self.dist_police_effectiveness = iu.effectiveness_police(data = district_db) self.dist_police_effectiveness_density = iu.effectiveness_sq_mile(polygon = du.get_polygon(district), data = district_db) #Setting attributes by month/year from the district self.dist_crime_density_month = self._generate_db_month_district_indicator(district_db, 'density') self.dist_police_effectiveness_month = self._generate_db_month_district_indicator(district_db,'effect') self.dist_police_effectiveness_density_month = self._generate_db_month_district_indicator(district_db, 'effectsq') circle_data = self._get_data_crime_circle(db) #Setting attributes to circle, total info self.circ_crime_density = iu.get_density(polygon = self._get_circle_boundaries(), ammount = len(circle_data)) self.circ_police_effectiveness = iu.effectiveness_police(data = circle_data) self.circ_police_effectiveness_density = iu.effectiveness_sq_mile(polygon = self._get_circle_boundaries(), data = circle_data) #Setting attributes to circle, by month/year self.circ_crime_density_month = self._generate_db_month_district_indicator(circle_data, 'density') self.circ_police_effectiveness_month = self._generate_db_month_district_indicator(circle_data,'effect') self.circ_police_effectiveness_density_month = self._generate_db_month_district_indicator(circle_data, 'effectsq')
def crime_density_by_district(self): """ Computes the indicator of number of crimes per square mile by district in the whole Chicago area Its output is a dictionary with each value, where key is the number of district """ crime_per_milesq = {} for district in self.districts_contained: data_district = self[self['District'] == district] try: crimes_per_district = iu.get_density(polygon=du.get_polygon(district), ammount=len(data_district)) except iu.ZeroAreaPolygon as errmessage: crime_per_milesq[district] = np.nan else: crime_per_milesq[district] = crimes_per_district return crime_per_milesq
def effectiveness_sq_mile_by_district(self): """ This method returns the effectiveness of the police station by mile square in every district. It returns a dictionary with the values, with key the number of district :return effectiveness_sq_mile """ effectiveness_sq_mile = {} for district in self.districts_contained: data_district = self[self['District'] == district] try: effectiveness_sqmile_district = iu.effectiveness_sq_mile(polygon = du.get_polygon(district), data = data_district) except ValueError as errmessage: raise MalformedCrimesDataFrame(errmessage) except iu.ZeroAreaPolygon: effectiveness_sq_mile[district] = np.nan else: effectiveness_sq_mile[district] = effectiveness_sqmile_district return effectiveness_sq_mile
def _generate_db_month_district(self, district=0): """ Filters the DB according to the district and generates a dictionary with the number of crimes by month and year, for each indicator. If district is set to zero, computes the total of all Chicago. As a note, it uses the following pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y') in order to create the index as a date type, making it easier to plot later. If there is no crime data, it sets the value Nan from numpy. Returns a a dictionary of the number of crimes :return data_ind """ Years = self.Year.unique() data_ind = pd.DataFrame(columns=('density', 'effectiveness', 'effect_by_sqm')) #CHicago considers the total information by month/year if district == 0: area = 0 for district in self.districts_contained: area += proj.PolygonProjection(du.get_polygon(district)).calculate_area_in_miles() for year in Years: data_district_year = self[self['Year'] == year] for month in range(1,13): data_district_month = data_district_year[data_district_year['Month'] == month] if len(data_district_month) == 0: data_ind.loc[ pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y')] =[np.nan, np.nan, np.nan] else: data_ind.loc[ pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y')] = [ len(data_district_month)/ area, len(data_district_month[data_district_month['Arrest'] == True])*1.0 / len(data_district_month), (len(data_district_month[data_district_month['Arrest'] == True])*1.0 / len(data_district_month['Arrest']))/area] #Other case, it computes the indicator of the inputted district else: for year in Years: data_district_year = self[(self['District'] == district) & (self['Year'] == year)] for month in range(1,13): data_district_month = data_district_year[data_district_year['Month'] == month] if len(data_district_month) == 0: data_ind.loc[ pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y')] =[np.nan, np.nan, np.nan] else: data_ind.loc[ pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y')] = [iu.get_density(polygon = du.get_polygon(district), ammount = len(data_district_month)), iu.effectiveness_police(data_district_month), iu.effectiveness_sq_mile(polygon = du.get_polygon(district), data =data_district_month)] return data_ind
def _get_data_crime_circle(self,fullDataCrime): """ Receives a non-filtered DF Computes the database of the points inside the circle by filtering them by distance :param fullDataCrime """ #Getting boundaries boundaries = self._get_circle_boundaries() districts = fullDataCrime['District'].unique() if len(districts)== 0: raise ValueError("There are no districts in data") district_polygons = {dist: du.get_polygon(dist) for dist in districts} districts_to_search = [] #Filtering districts where the circle has values, to optimize time for bound in boundaries: for dist in district_polygons.keys(): if dist not in districts_to_search: if gu.return_points_in_polygon([bound],district_polygons[dist]): districts_to_search.append(dist) dataframe = fullDataCrime[fullDataCrime['District'].isin(districts_to_search)] #Getting the points inside of circle by distance (less equal than 1 mile) index_of_entries_in_circle = [] for entry in dataframe.index: lat, lon = dataframe.ix[entry].Latitude, dataframe.ix[entry].Longitude if gu.calculate_distance_between_points(self.address.lat, self.address.lon, lat, lon) <= 1: index_of_entries_in_circle.append(entry) return dataframe.ix[index_of_entries_in_circle]
def test_get_polygon_valid_inputs(self): ''' This test will pass if no exceptions are raised ''' for district in self.valid_districts: databases_utils.get_polygon(district)
def setUp(self): self.boundaries = du.get_polygon(11) self.data = pd.read_csv('./tests/csv_for_test.csv')
def _generate_db_summarized_for_district(self, db): """ This functions returns a dataframe of the indicators per month/year. It filters the data in year and then in month, so that it can compute the indicators in each subset (month and year). As a note, it uses the following pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y') in order to create the index as a date type, making it easier to plot later. :param db: """ Years = db.Year.unique() data_ind = pd.DataFrame(columns=('density', 'effectiveness', 'effect_by_sqm')) for year in Years: data_year = db[db['Year'] == year] for month in range(1,13): data_district_month = data_year[data_year.Month == month] if len(data_district_month) == 0: data_ind.loc[ pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y')] =[np.nan, np.nan, np.nan] else: data_ind.loc[pd.datetime.strptime(str(int(month)).zfill(2)+str(year), '%m%Y')] = [iu.get_density(polygon=du.get_polygon(district_number=self.district), ammount= len(data_district_month)), iu.effectiveness_police(data=data_district_month), iu.effectiveness_sq_mile(polygon = du.get_polygon(self.district), data =data_district_month)] return data_ind