예제 #1
0
def getLine(geographicValue, target, states, year, month):
    startPeriod, endPeriod = utils.parse_dates(year, month)

    if geographicValue == 'Department':
        tmp = df[(df['depto_establecimiento'].isin(states))
                 & (df['target'] == target)]
        tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) &
                  (tmp['Ingreso_Month'] <
                   endPeriod)]  # filter dataset by the daterange
        tmp = tmp.groupby(
            ['interno', 'depto_establecimiento',
             'Ingreso_Month']).count().reset_index()
        grouped = tmp.groupby(['depto_establecimiento',
                               'Ingreso_Month']).count().reset_index()
    elif geographicValue == 'National':
        tmp = df[df['target'] == target]
        tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) &
                  (tmp['Ingreso_Month'] <
                   endPeriod)]  # filter dataset by the daterange
        grouped = tmp.groupby(
            ['Ingreso_Month']).count().reset_index()  # count unique convict ID
        grouped['national'] = "Colombia"

    fig = line_Plot(grouped, geographicValue)

    for ser in fig['data']:
        ser['hovertemplate'] = '%{x}<br>%{y}'

    return fig
예제 #2
0
def getBarsentence(geographicValue, target, states, year, month):
    startPeriod, endPeriod = utils.parse_dates(year, month)
    tmp = df[df['target'] == target]

    if geographicValue == 'Department':
        targetgeo = "depto_establecimiento"
        tmp = tmp[tmp['depto_establecimiento'].isin(states)]
        tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) &
                  (tmp['Ingreso_Month'] <
                   endPeriod)]  # filter dataset by the daterange
        tmp = tmp[["interno", "sentencia", targetgeo]]
    elif geographicValue == 'National':
        tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) &
                  (tmp['Ingreso_Month'] <
                   endPeriod)]  # filter dataset by the daterange
        tmp = tmp[["interno", "sentencia"]]

    tmp.drop_duplicates(subset="interno", keep='first', inplace=True)
    tmp = tmp.reset_index(drop=True)
    bins = range(0, 13)
    labels = range(1, 13, 1)
    tmp['sentence_group'] = pd.cut(tmp.sentencia,
                                   bins,
                                   labels=labels,
                                   include_lowest=True)
    grouped = tmp.groupby([
        "sentence_group"
    ])["interno"].count().reset_index().sort_values("sentence_group")
    fig = barsentence_Plot(grouped, geographicValue)
    fig.data[0].hovertemplate = '%{label}<br>%{value}'
    return fig
예제 #3
0
def getBarage(geographicValue, target, states, year, month):
    startPeriod, endPeriod = utils.parse_dates(year, month)
    tmp = df[df['target'] == target]

    if geographicValue == 'Department':
        targetgeo = "depto_establecimiento"
        tmp = tmp[tmp['depto_establecimiento'].isin(states)]
        tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) &
                  (tmp['Ingreso_Month'] <
                   endPeriod)]  # filter dataset by the daterange
        tmp = tmp[["interno", "edad", "genero", targetgeo]]
    elif geographicValue == 'National':
        tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) &
                  (tmp['Ingreso_Month'] <
                   endPeriod)]  # filter dataset by the daterange
        tmp = tmp[["interno", "edad", "genero"]]

    tmp.drop_duplicates(subset="interno", keep='first', inplace=True)
    tmp = tmp.reset_index(drop=True)
    bins = [18, 25, 35, 45, 55, 65, 100]
    labels = ['18-25', '26-35', '36-45', '46-55', '56-65', '65+']
    tmp['age_range'] = pd.cut(tmp.edad,
                              bins,
                              labels=labels,
                              include_lowest=True)
    # grouped = tmp.groupby(["age_range", "genero", targetgeo])["interno"].count().reset_index().sort_values("age_range")
    grouped = tmp.groupby(
        ["age_range",
         "genero"])["interno"].count().reset_index().sort_values("age_range")
    fig = barage_Plot(grouped, geographicValue)
    fig.data[0].hovertemplate = '%{label}<br>%{value}'
    fig.data[1].hovertemplate = '%{label}<br>%{value}'
    return fig
예제 #4
0
def getBlock(geographicValue, target, states, year, month):
    startPeriod, endPeriod = utils.parse_dates(year, month)
    tmp = df[df['target'] == target]

    if geographicValue == 'Department':
        tmp = tmp[tmp['depto_establecimiento'].isin(states)]

    tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) & (
        tmp['Ingreso_Month'] < endPeriod)]  # filter dataset by the daterange
    grouped = tmp.groupby([
        "delito"
    ]).size().reset_index(name="count").sort_values(by="count",
                                                    ascending=False)

    if grouped.shape[0] > 50:
        grouped = grouped.iloc[:50]
    else:
        grouped

    fig = block_Plot(grouped, geographicValue)
    return fig
예제 #5
0
파일: map.py 프로젝트: acocac/ds4a-app
def get_map_data(geographicValue, target, year, month):
    startPeriod, endPeriod = utils.parse_dates(year, month)

    if geographicValue == 'Department':
        tmp = df[df['target'] == target]
        tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) &
                  (tmp['Ingreso_Month'] <
                   endPeriod)]  # filter dataset by the daterange
        tmp = tmp.groupby(['interno', 'depto_establecimiento'
                           ]).count().reset_index()  # count unique convict ID
        grouped = tmp.groupby(['depto_establecimiento']).count().reset_index()
        grouped['normalized_count'] = 100 * grouped['interno'] / grouped[
            'interno'].sum()
        missing_depto = {
            'depto_establecimiento':
            ["VAUPES", "VICHADA", "GUAVIARE", "GUAINIA"],
            'interno': [0, 0, 0, 0],
            'normalized_count': [0, 0, 0, 0]
        }
        add_missing = pd.DataFrame(data=missing_depto)
        grouped_final = pd.concat([grouped, add_missing], ignore_index=True)
        grouped_final.fillna(0, inplace=True)
        figure = map_departamentos(grouped_final)

    elif geographicValue == 'National':
        tmp = df[df['target'] == target]
        tmp = tmp[(tmp['Ingreso_Month'] >= startPeriod) &
                  (tmp['Ingreso_Month'] <
                   endPeriod)]  # filter dataset by the daterange
        tmp[geographicValue] = "Colombia"
        tmp = tmp.groupby(['interno', geographicValue
                           ]).count().reset_index()  # count unique convict ID
        grouped = tmp.groupby([geographicValue]).count().reset_index()
        figure = map_national(grouped)

    return figure