def getLine(geographicValue, target, states, year, month): startPeriod, endPeriod = utils.parse_dates(year, month) if geographicValue == 'Department': tmp = df[(df['depto_establecimiento'].isin(states)) & (df['target'] == target)] tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) & (tmp['Ingreso_Month'] < endPeriod)] # filter dataset by the daterange tmp = tmp.groupby( ['interno', 'depto_establecimiento', 'Ingreso_Month']).count().reset_index() grouped = tmp.groupby(['depto_establecimiento', 'Ingreso_Month']).count().reset_index() elif geographicValue == 'National': tmp = df[df['target'] == target] tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) & (tmp['Ingreso_Month'] < endPeriod)] # filter dataset by the daterange grouped = tmp.groupby( ['Ingreso_Month']).count().reset_index() # count unique convict ID grouped['national'] = "Colombia" fig = line_Plot(grouped, geographicValue) for ser in fig['data']: ser['hovertemplate'] = '%{x}<br>%{y}' return fig
def getBarsentence(geographicValue, target, states, year, month): startPeriod, endPeriod = utils.parse_dates(year, month) tmp = df[df['target'] == target] if geographicValue == 'Department': targetgeo = "depto_establecimiento" tmp = tmp[tmp['depto_establecimiento'].isin(states)] tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) & (tmp['Ingreso_Month'] < endPeriod)] # filter dataset by the daterange tmp = tmp[["interno", "sentencia", targetgeo]] elif geographicValue == 'National': tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) & (tmp['Ingreso_Month'] < endPeriod)] # filter dataset by the daterange tmp = tmp[["interno", "sentencia"]] tmp.drop_duplicates(subset="interno", keep='first', inplace=True) tmp = tmp.reset_index(drop=True) bins = range(0, 13) labels = range(1, 13, 1) tmp['sentence_group'] = pd.cut(tmp.sentencia, bins, labels=labels, include_lowest=True) grouped = tmp.groupby([ "sentence_group" ])["interno"].count().reset_index().sort_values("sentence_group") fig = barsentence_Plot(grouped, geographicValue) fig.data[0].hovertemplate = '%{label}<br>%{value}' return fig
def getBarage(geographicValue, target, states, year, month): startPeriod, endPeriod = utils.parse_dates(year, month) tmp = df[df['target'] == target] if geographicValue == 'Department': targetgeo = "depto_establecimiento" tmp = tmp[tmp['depto_establecimiento'].isin(states)] tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) & (tmp['Ingreso_Month'] < endPeriod)] # filter dataset by the daterange tmp = tmp[["interno", "edad", "genero", targetgeo]] elif geographicValue == 'National': tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) & (tmp['Ingreso_Month'] < endPeriod)] # filter dataset by the daterange tmp = tmp[["interno", "edad", "genero"]] tmp.drop_duplicates(subset="interno", keep='first', inplace=True) tmp = tmp.reset_index(drop=True) bins = [18, 25, 35, 45, 55, 65, 100] labels = ['18-25', '26-35', '36-45', '46-55', '56-65', '65+'] tmp['age_range'] = pd.cut(tmp.edad, bins, labels=labels, include_lowest=True) # grouped = tmp.groupby(["age_range", "genero", targetgeo])["interno"].count().reset_index().sort_values("age_range") grouped = tmp.groupby( ["age_range", "genero"])["interno"].count().reset_index().sort_values("age_range") fig = barage_Plot(grouped, geographicValue) fig.data[0].hovertemplate = '%{label}<br>%{value}' fig.data[1].hovertemplate = '%{label}<br>%{value}' return fig
def getBlock(geographicValue, target, states, year, month): startPeriod, endPeriod = utils.parse_dates(year, month) tmp = df[df['target'] == target] if geographicValue == 'Department': tmp = tmp[tmp['depto_establecimiento'].isin(states)] tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) & ( tmp['Ingreso_Month'] < endPeriod)] # filter dataset by the daterange grouped = tmp.groupby([ "delito" ]).size().reset_index(name="count").sort_values(by="count", ascending=False) if grouped.shape[0] > 50: grouped = grouped.iloc[:50] else: grouped fig = block_Plot(grouped, geographicValue) return fig
def get_map_data(geographicValue, target, year, month): startPeriod, endPeriod = utils.parse_dates(year, month) if geographicValue == 'Department': tmp = df[df['target'] == target] tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) & (tmp['Ingreso_Month'] < endPeriod)] # filter dataset by the daterange tmp = tmp.groupby(['interno', 'depto_establecimiento' ]).count().reset_index() # count unique convict ID grouped = tmp.groupby(['depto_establecimiento']).count().reset_index() grouped['normalized_count'] = 100 * grouped['interno'] / grouped[ 'interno'].sum() missing_depto = { 'depto_establecimiento': ["VAUPES", "VICHADA", "GUAVIARE", "GUAINIA"], 'interno': [0, 0, 0, 0], 'normalized_count': [0, 0, 0, 0] } add_missing = pd.DataFrame(data=missing_depto) grouped_final = pd.concat([grouped, add_missing], ignore_index=True) grouped_final.fillna(0, inplace=True) figure = map_departamentos(grouped_final) elif geographicValue == 'National': tmp = df[df['target'] == target] tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) & (tmp['Ingreso_Month'] < endPeriod)] # filter dataset by the daterange tmp[geographicValue] = "Colombia" tmp = tmp.groupby(['interno', geographicValue ]).count().reset_index() # count unique convict ID grouped = tmp.groupby([geographicValue]).count().reset_index() figure = map_national(grouped) return figure