Exemple #1
0
def empty_plot(plot_dim, title, scale, scale_text=1):

    p = figure(plot_height=plot_dim[0],
               plot_width=plot_dim[1],
               x_range=(1, 10),
               y_range=(1, 10),
               title=title)

    p = USAID_style(p)

    font_size = str(20 * scale * scale_text) + 'pt'
    source = ColumnDataSource({'x': [4], 'y': [5], 'text': ['No Data']})
    glyph = Text(x="x",
                 y="y",
                 text="text",
                 text_color='#999999',
                 text_font='Gill Sans MT',
                 text_font_size=font_size)

    p.add_glyph(source, glyph)

    p.axis.major_label_text_color = 'white'
    p.toolbar.logo = None
    p.toolbar_location = None

    return p
Exemple #2
0
def gen_area_stack(data,
                   areas_sel,
                   focus_sel,
                   year_sel=(2000, 2017),
                   areas_var='series_id',
                   focus_var='country_id',
                   value_var='value_start',
                   year_var='year',
                   font="Gill Sans MT",
                   prop=False,
                   colors='default',
                   title_text='Area Chart',
                   fill_alpha=.5,
                   area_labels=['default'],
                   plot_dim=(350, 550),
                   scale=1,
                   no_tools=False,
                   print_details=False):

    ########################
    ### Set color scheme
    palette = {
        'USAID Blue': '#002F6C',
        'Medium Blue': '#0067B9',
        'Light Blue': '#A7C6ED',
        'Light Gray': '#CFCDC9',
        'USAID Red': '#BA0C2F',
        'Dark Red': '#651D32',
        'Medium Gray': '#8C8985',
        'Rich Black': '#212721',
        'Dark Gray': '#6C6463'
    }

    if colors == 'default':
        colors = list(palette.keys())

    ###############################
    ####   select observations (countries) and variables

    data = data[data[focus_var] == focus_sel[0]]
    data = data[data[areas_var].isin(areas_sel)]
    data = data[data[year_var].between(year_sel[0], year_sel[1])]

    # sort values on year
    # length of dataset at pivot
    detail_1 = len(data)
    ## detail 3: data
    detail_2 = data

    # sort values on year
    # length of dataset at pivot
    detail_1 = len(data)
    ## detail 3: data
    detail_2 = data

    # rename series_ids to thier order in the list
    for i in range(0, len(areas_sel)):
        data.loc[data[areas_var] == areas_sel[i], areas_var] = i
    data.sort_values([areas_var, year_var], inplace=True)
    #print(data)

    # try, catch if insufficient data
    try:
        ################################
        ###   Change data into panel
        ################################

        # select only the data we need
        data = data[[focus_var, areas_var, year_var, value_var]]
        # shift data into panel format
        data = data.pivot_table(values=value_var,
                                index=[focus_var, year_var],
                                columns=areas_var,
                                aggfunc=np.sum).reset_index()

        # drop focus var (no longer needed)
        data.drop(focus_var, axis=1, inplace=True)
        # sort on year_var, drop missing values
        data = data.sort_values(year_var).dropna()

        # select the min and max year for the plot
        years = (data[year_var].min(), data[year_var].max())

        # set index to year
        data.set_index(year_var, inplace=True)

        #################################
        ##### Proportion or normal stack
        #################################

        if prop == True:

            num = data[list(range(0, len(areas_sel)))].sum(axis=1).to_frame()

            # generate a sum variable
            data['total'] = data[list(range(0, len(areas_sel)))].sum(axis=1)
            # replace variable values with the proportions
            for i in list(range(0, len(areas_sel))):
                data[i] = data[i] / data['total'] * 100
            #drop total when finished

            data.drop('total', inplace=True, axis=1)

        # change all variable names to an iterable (y0, y1, etc)
        ys = ['yy' + str(i) for i in range(0, len(areas_sel))]
        data.columns = ys

        ####################################
        ##### Generate Area Plot

        # generate the numeric basics of the plot for input into patches
        areas = stacked(data)

        # generate a max value for the y axis position
        max_value = np.nanmax(areas.values)
        max_value = np.round(max_value, 0)
        max_value = int(max_value)

        ## detail 2: max value
        detail_2 = max_value

        ##############################################
        ### generate y_range if prop ==True
        ###############################################

        #### scale plot
        plot_dim = (plot_dim[0] * scale, plot_dim[1] * scale)

        ######## Try to make the plot, if fail due to value error (insufficient data),
        ######## send back empty plot which starts that there is no data.

        if prop == True:
            p = figure(x_range=years,
                       y_range=(0, 100),
                       plot_height=plot_dim[0],
                       plot_width=plot_dim[1])
        else:
            p = figure(x_range=years,
                       y_range=(0, max_value),
                       plot_height=plot_dim[0],
                       plot_width=plot_dim[1])

        ### generate the stack coordinates
        x2 = np.hstack((data.index[::-1], data.index))

        # plot the patches
        p.patches([x2] * areas.shape[1], [areas[c].values for c in areas],
                  color=[palette[i] for i in colors],
                  fill_alpha=fill_alpha,
                  line_width=3 * scale)

        # generate the hover line
        source = ColumnDataSource(data)
        p.line(x='year',
               y='yy0',
               source=source,
               color=palette['USAID Blue'],
               line_width=.2)

        #######################################
        ### Generate the Legend and Hover
        #######################################

        if area_labels == ['default']:
            names = [str(i) for i in areas_sel]
        else:
            names = area_labels
        labels = []

        # iterate over the areas and plot the visual
        for i, area in enumerate(areas):
            # save the meta data from each p in [r]
            r = p.patch(x2,
                        areas[area],
                        color=palette[colors[i]],
                        alpha=0.8,
                        line_color=None)
            # generate a seperate label based on the r meta data.

            labels.append(LegendItem(label=dict(value=names[i]),
                                     renderers=[r]))

        # plot the legend on the right of the plot
        legend = Legend(items=labels,
                        location=(0, 10),
                        orientation='horizontal')
        p.add_layout(legend, 'above')

        ########### Hover
        tooltips1 = []
        for i in range(0, len(areas_sel)):
            tip = (names[i], '@' + 'yy' + str(i) + '{0.00 a}')
            tooltips1.append(tip)

        hover = HoverTool(
            tooltips=tooltips1,
            # display a tooltip whenever the cursor is vertically in line with a glyph
            mode='vline')

        p.add_tools(hover)

        p = USAID_style(p, font=font)

        p.title.text = title_text
        p.legend.background_fill_alpha = 0
        p.legend.border_line_color = None
        p.xgrid.visible = True
        p.legend.glyph_height = 30
        p.legend.glyph_width = 30

        ######## drop tools if prompted
        if no_tools == True:
            p.toolbar.logo = None
            p.toolbar_location = None

        if print_details == True:
            print('The length of the dataset')
            print(detail_1)
            print('The max value of the dataset')
            print(detail_2)
            print('The dataset')
            print(detail_3)

    except:
        plot_dim = (plot_dim[0] * scale, plot_dim[1] * scale)
        p = empty_plot(plot_dim, title=title_text, scale=scale)

        # print details if asked.
        if print_details == True:
            print('The length of the dataset')
            print(detail_1)
            print('Dataset to plot:')
            print(detail_2)

    return p
Exemple #3
0
def gen_pie(data,
            cat_sel=[],
            focus_sel=[],
            cat_labels=[],
            cat_var='series_id',
            focus_var='country_id',
            value_var='value_start',
            agg_option='sum',
            title_text='Pie Chart',
            fill_alpha=0.5,
            font='Gill Sans MT',
            print_details=False,
            plot_dim=(500, 500),
            scale=1,
            no_tools=False,
            legend_location='top_right',
            line_color='white',
            line_width=3,
            legend_orientation='vertical'):
    '''Generate a pie chart given a data selection, cat_sel refers to the categories 
    which will be divided within the pie chart (the colors of the pie), value_var_name
    is the column which will determine the proportion of the pie. obs_sel will determine
    the country or region observations selected (these will be averaged or summed 
    dependending of the agg_option selection). The plot returns a pie chart.'''

    ##################################
    ##### generate the underlying data
    ##################################

    # select the categories and observations of interest
    data = data[data[cat_var].isin(cat_sel) & data[focus_var].isin(focus_sel)]

    # for each category
    for i in range(0, len(cat_labels)):
        # replace the cat_var with the category name
        data.loc[data[cat_var] == cat_sel[i], cat_var] = cat_labels[i]

    # generate the counter dictionary
    counter_dict = gen_counter(data, cat_var, value_var, agg_option)

    # use the counter function to generate x for the plot
    x = Counter(counter_dict)

    # place 'x' into a dataframe called data (replace the old data for efficiency)
    data = pd.DataFrame.from_dict(
        dict(x), orient='index').reset_index().rename(index=str,
                                                      columns={
                                                          0: 'value',
                                                          'index': 'category'
                                                      })

    # generate the angle of each slice
    data['angle'] = data['value'] / sum(x.values()) * 2 * pi

    # add colors based on USAID color scheme
    palette = {
        'USAID Blue': '#002F6C',
        'USAID Red': '#BA0C2F',
        'Rich Black': '#212721',
        'Medium Blue': '#0067B9',
        'Light Blue': '#A7C6ED',
        'Dark Red': '#651D32',
        'Dark Gray': '#6C6463',
        'Medium Gray': '#8C8985',
        'Light Gray': '#CFCDC9'
    }
    # select colors based on the length of
    #### decide whether this should fail or not if there are missing values in the cat_sel
    #### this will fail in its curren state (address later...)
    data['color'] = [palette[i] for i in list(palette.keys())[0:len(cat_sel)]]

    ### detail_1 = dataset
    detail_1 = data

    #### scale plot
    plot_dim = (plot_dim[0] * scale, plot_dim[1] * scale)
    line_width = line_width * scale

    try:

        ####################################
        ###### Generate the plot
        ####################################

        # generate figure with simple tooltip (need to add style later)
        p = figure(plot_height=plot_dim[0],
                   plot_width=plot_dim[1],
                   title=title_text,
                   tools="save,hover",
                   tooltips="@category: @value{0.0}")

        # generate the wedges for the pie chart.
        p.wedge(x=0,
                y=1,
                radius=0.4,
                start_angle=cumsum('angle', include_zero=True),
                end_angle=cumsum('angle'),
                line_color=line_color,
                line_width=line_width,
                fill_color='color',
                fill_alpha=0.9,
                legend='category',
                source=data)

        # basic formatting of the chart.
        p.axis.axis_label = None
        p.axis.visible = False
        p.grid.grid_line_color = None

        p = USAID_style(p, font=font)

        p.legend.orientation = legend_orientation

        if legend_location == 'outside':
            p.legend.location = 'center'
            new_legend = p.legend[0]
            p.legend[0].plot = None
            p.add_layout(new_legend, 'right')

        else:
            p.legend.location = legend_location

        ######## drop tools if prompted
        if no_tools == True:
            p.toolbar.logo = None
            p.toolbar_location = None

        # print details if asked.
        if print_details == True:
            print('The dataset:')
            print(detail_1)

    except:
        plot_dim = (plot_dim[0] * scale, plot_dim[1] * scale)
        p = empty_plot(plot_dim, title=title_text, scale=scale)

        # print details if asked.
        if print_details == True:
            print('The dataset:')
            print(detail_1)

    return p
Exemple #4
0
def gen_multi_bar(data,
                  obs_sel,
                  cat_sel,
                  cat_labels,
                  obs_labels,
                  obs_var='country_id',
                  cat_var='series_id',
                  value_var='value_start',
                  colors='default',
                  x_axis_label='',
                  y_axis_label='',
                  title_text='Multiple bar',
                  plot_orientation='vertical',
                  fill_alpha=0.8,
                  line_width=2,
                  plot_dim=(300, 500),
                  year_sel=(2016, 2017),
                  year_var='year',
                  scale=1,
                  legend_orientation='vertical',
                  legend_location='center',
                  bar_width=0.2,
                  major_label_orientation='horizontal',
                  print_details=False,
                  font='Gill Sans MT'):
    '''This function generates a multi bar given data and three columns in long form. The 
    user can also choose whether the stack is vertical or horizontal in orientation. '''

    palette = {
        'USAID Blue': '#002F6C',
        'Medium Blue': '#0067B9',
        'Light Blue': '#A7C6ED',
        'Medium Gray': '#8C8985',
        'Light Gray': '#CFCDC9',
        'USAID Red': '#BA0C2F',
        'Rich Black': '#212721',
        'Medium Blue': '#0067B9',
        'Light Blue': '#A7C6ED',
        'Dark Red': '#651D32',
        'Dark Gray': '#6C6463',
        'Medium Gray': '#8C8985',
        'Light Gray': '#CFCDC9'
    }

    # select only the observations of interest
    data = data[(data[obs_var].isin(obs_sel)) & (data[cat_var].isin(cat_sel)) &
                (data[year_var].between(year_sel[0], year_sel[1]))]

    #####################
    ### deal with missings
    ######################

    # replace all obs_sel with obs_labels
    for i in range(0, len(obs_sel)):
        data.loc[data[obs_var] == obs_sel[i], obs_var] = obs_labels[i]

    # generate dataframe unqiue for cat_var and obs_var
    a = []
    for i in obs_labels:
        for f in cat_sel:
            a.append((i, f))
    df = pd.DataFrame(a)
    df.columns = [obs_var, cat_var]

    # merge in data_small
    data = pd.merge(df, data, on=[obs_var, cat_var], how='left')

    # replace missing with zero
    data = data.fillna(0)

    #####################
    ### prep data for visual
    ######################

    # generate the lists for labels
    observations = list(data[obs_var].unique())
    categories = [str(i) for i in cat_sel]

    if colors == 'default':
        colors = list(palette.values())[0:len(categories)]

    # generate the dictionary of data points
    d = {'obs': observations}

    # for each category
    for i in range(0, len(categories)):
        # place the values of this category in a dictionary of its name.
        d[cat_labels[i]] = list(
            data[data[cat_var] == int(categories[i])][value_var].values)

    # detail 1 is the dictionary
    detail_1 = d

    # try to generate the plot:
    try:

        # make into dataframe, add to Colomn source list
        df_final = pd.DataFrame(d)
        source = ColumnDataSource(df_final)

        # generate max vlaue to we can start the plot at zero
        values_df = df_final.drop('obs', axis=1)
        max_value = np.nanmax(
            values_df.values) + .05 * np.nanmax(values_df.values)
        max_value = np.round(max_value, 0)
        max_value = int(max_value)

        #############################
        # generate the plot
        ##############################

        #### scale plot
        plot_dim = (plot_dim[0] * scale, plot_dim[1] * scale)
        line_width = line_width * scale

        # legend titles because they cannot be perfectly the same as the label names, we add a space to the end
        legend_names = [i + ' ' for i in cat_labels]

        # generate the spacing between the plots
        spacing = spacing_alg(categories)

        if plot_orientation == 'vertical':

            # generate figrue
            p = figure(x_range=observations,
                       y_range=(0, max_value),
                       plot_height=plot_dim[0],
                       plot_width=plot_dim[1],
                       toolbar_location=None,
                       title=title_text)

            # place bars
            for i in range(0, len(categories)):
                p.vbar(x=dodge('obs', spacing[i], range=p.x_range),
                       top=cat_labels[i],
                       width=bar_width,
                       source=source,
                       fill_alpha=fill_alpha,
                       line_width=line_width,
                       color=colors[i],
                       legend=value(legend_names[i]))
        else:
            # generate figure
            p = figure(y_range=observations,
                       x_range=(0, max_value),
                       plot_height=plot_dim[0],
                       plot_width=plot_dim[1],
                       toolbar_location=None,
                       title=title_text)

            # place bars
            for i in range(0, len(categories)):
                p.hbar(y=dodge('obs', spacing[i], range=p.y_range),
                       right=cat_labels[i],
                       height=bar_width,
                       left=0,
                       source=source,
                       fill_alpha=fill_alpha,
                       line_width=line_width,
                       color=colors[i],
                       legend=value(legend_names[i]))

        #######################
        ###### hover tool
        #######################

        tooltips = [(i, '@' + i + '{0.0}') for i in cat_labels]

        tooltips = HoverTool(tooltips=tooltips,
                             mode='mouse',
                             point_policy='follow_mouse')
        p.add_tools(tooltips)

        p = USAID_style(p, font=font)

        ######################
        #### style choices
        ######################

        p.yaxis.visible = True
        p.xaxis.visible = True
        if major_label_orientation != 'horizontal':
            p.xaxis.major_label_orientation = pi / 2
        ########################
        ### legend options
        ########################

        p.legend.orientation = legend_orientation

        if legend_location == 'outside':
            # set legend location outside the plot (change the 'above' to change to the sides)
            p.legend.location = 'center'
            new_legend = p.legend[0]
            p.legend[0].plot = None
            p.add_layout(new_legend, 'above')
            p.legend.border_line_color = None

        else:
            # set the legend location within the plot in this location
            p.legend.location = legend_location

        # print details of if prompted
        if print_details == True:
            print('Dictionary of values')
            print(detail_1)

    except ValueError:
        plot_dim = (plot_dim[0] * scale, plot_dim[1] * scale)
        p = empty_plot(plot_dim, title=title_text, scale=scale)

        # print details of if prompted
        if print_details == True:
            print('Dictionary of values')
            print(detail_1)

    return p
Exemple #5
0
def gen_stacked_bar(data,
                    cat_sel,
                    stacked_sel,
                    stacked_labels,
                    cat_labels,
                    cat_var='country_id',
                    stacked_var='series_id',
                    value_var='value_start',
                    colors='default',
                    x_axis_label='',
                    y_axis_label='',
                    title_text='stacked bar',
                    orientation='x-axis',
                    fill_alpha=0.8,
                    line_width=2,
                    plot_dim=(300, 500),
                    year_sel=(2016, 2017),
                    year_var='year',
                    scale=1,
                    legend_location='center',
                    bar_gap=0.4,
                    prop=False,
                    no_tools=False,
                    major_label_orientation='horizontal',
                    print_details=False,
                    font='Gill Sans MT'):
    '''This function generates a stacked bar given data and three variables in long form. The 
    user can also choose whether the stack is vertical or horizontal in orientation. '''

    palette = {
        'USAID Blue': '#002F6C',
        'Medium Blue': '#0067B9',
        'Light Blue': '#A7C6ED',
        'Medium Gray': '#8C8985',
        'Light Gray': '#CFCDC9',
        'USAID Red': '#BA0C2F',
        'Rich Black': '#212721',
        'Medium Blue': '#0067B9',
        'Light Blue': '#A7C6ED',
        'Dark Red': '#651D32',
        'Dark Gray': '#6C6463',
        'Medium Gray': '#8C8985',
        'Light Gray': '#CFCDC9'
    }

    # select only the observations of interest
    data = data[(data[cat_var].isin(cat_sel))]
    data = data[data[stacked_var].isin(stacked_sel)]
    data = data[data[year_var].between(year_sel[0], year_sel[1])]

    for i in range(0, len(cat_sel)):
        data.loc[data[cat_var] == cat_sel[i], cat_var] = cat_labels[i]

    # generate the lists for labels
    bars = list(data[cat_var].unique())
    stacks = [str(i) for i in stacked_sel]

    if colors == 'default':
        colors = list(palette.values())[0:len(stacks)]

    # generate the dictionary of data points
    d = {'bars': bars}

    for i in range(0, len(stacks)):
        d[stacked_labels[i]] = list(
            data[data[stacked_var] == int(stacks[i])][value_var].values)

    #############################
    #### Adjust indicators to equal 100 if prop plot
    #############################

    # generate dataset of values
    values_dict = {i: d[i] for i in stacked_labels}
    values_df = pd.DataFrame(values_dict)
    values_df['total'] = values_df[stacked_labels].sum(axis=1)

    # length of dataset at pivot
    detail_1 = len(values_df)
    ## detail 3: data
    detail_3 = values_df

    #################################
    ##### Proportion or normal stack
    #################################

    # try, catch if insufficient data
    try:

        # if we want to turn into a proportion chart.
        if prop == True:

            # alter values to proportion
            # generate a sum variable
            values_df['total'] = values_df[stacked_labels].sum(axis=1)
            # replace variable values with the proportions
            for i in stacked_labels:
                values_df[i] = values_df[i] / values_df['total'] * 100
            #drop total when finished
            values_df.drop('total', inplace=True, axis=1)

            # return to dictionary as d

            d = {
                stacked_labels[i]: values_df.iloc[0].tolist()
                for i in range(0, len(stacked_labels))
            }

            # place bars back into the dataset.
            d['bars'] = bars

        # generate a max value for the y axis position
        ### gen dataframe
        max_value = np.nanmax(
            values_df.values) + .05 * np.nanmax(values_df.values)
        max_value = np.round(max_value, 0)
        max_value = int(max_value)

        ## detail 2: max value
        detail_2 = max_value

        #############################
        # generate the plot
        ##############################

        #### scale plot
        plot_dim = (plot_dim[0] * scale, plot_dim[1] * scale)
        line_width = line_width * scale

        # legend titles because they cannot be perfectly the same as the label names, we add a space to the end
        legend_names = [i + ' ' for i in stacked_labels]

        ###### Generate the figure
        if prop == True:
            p = figure(x_range=bars,
                       y_range=(0, 100),
                       plot_height=plot_dim[0],
                       plot_width=plot_dim[1])
        else:
            p = figure(x_range=bars,
                       y_range=(0, max_value),
                       plot_height=plot_dim[0],
                       plot_width=plot_dim[1])

        # generate stack
        if orientation == 'x-axis':
            p.vbar_stack(stacked_labels,
                         x='bars',
                         width=bar_gap,
                         line_width=line_width,
                         color=colors,
                         source=d,
                         fill_alpha=fill_alpha,
                         legend=legend_names)
        else:
            print('y-axis plot in construction.')

        #######################
        ###### hover tool
        #######################

        tooltips = [(i, '@' + i + '{0.0}') for i in stacked_labels]

        tooltips = HoverTool(tooltips=tooltips,
                             mode='mouse',
                             point_policy='follow_mouse')
        p.add_tools(tooltips)

        ######################
        #### style choices
        ######################

        p = USAID_style(p, font=font)

        p.yaxis.visible = True
        p.xaxis.visible = True

        if major_label_orientation != 'horizontal':
            p.xaxis.major_label_orientation = pi / 2.5

        if legend_location == 'outside':
            p.legend.location = 'center'
            new_legend = p.legend[0]
            p.legend[0].plot = None
            p.add_layout(new_legend, 'right')

        else:
            p.legend.location = legend_location

        ######## drop tools if prompted
        if no_tools == True:
            p.toolbar.logo = None
            p.toolbar_location = None

    except:
        plot_dim = (plot_dim[0] * scale, plot_dim[1] * scale)
        p = empty_plot(plot_dim, title=title_text, scale=scale)

        # print details if asked.
        if print_details == True:
            print('The length of the dataset')
            print(detail_1)
            print('The max value of the dataset')
            print(detail_2)
            print('The dataset')
            print(detail_3)

    return p
Exemple #6
0
def gen_grouped_bar(data,
                    cat_var_member,
                    cat_var_group,
                    value_var='value_start',
                    cat_var_member_name='series_name',
                    cat_var_group_name='country_id',
                    title_text='Grouped Bar - Example',
                    line_width=3,
                    fill_alpha=.5,
                    plot_dim=(300, 800),
                    scale=1,
                    no_tools=False,
                    major_label_orientation='horizontal',
                    font='Gill Sans MT'):
    ''' this function generated a group bar chart where the the cat_var_member data are 
    nested under the group variables, colored by group variables. Can change which varaibles refer to 
    the group var and the member by changing the (*name) varialbes'''

    ########################
    #### prep data
    ########################

    # select the data of interest
    data = data[data[cat_var_member_name].isin(cat_var_member)
                & data[cat_var_group_name].isin(cat_var_group)]

    # make sure cat_vars are strings types (object)
    data[cat_var_member_name] = data[cat_var_member_name].astype(str)
    data[cat_var_group_name] = data[cat_var_group_name].astype(str)

    # generate groupby
    group = data.groupby((cat_var_group_name, cat_var_member_name))

    ###### generate palette
    palette = {
        'USAID Blue': '#002F6C',
        'USAID Red': '#BA0C2F',
        'Rich Black': '#212721',
        'Medium Blue': '#0067B9',
        'Light Blue': '#A7C6ED',
        'Dark Red': '#651D32',
        'Dark Gray': '#6C6463',
        'Medium Gray': '#8C8985',
        'Light Gray': '#CFCDC9'
    }
    palette = [palette[i] for i in list(palette.keys())[0:len(cat_var_group)]]

    # generate factor_cmap based on the group member (the name can be anything - just referable)
    name = cat_var_group_name + '_' + cat_var_member_name
    index_cmap = factor_cmap(name,
                             palette=palette,
                             factors=sorted(data[cat_var_group_name].unique()),
                             end=1)

    name_tip = '@' + name
    value_tip = '@' + value_var + '_mean{0.0}'
    ##########################
    ##### Generate plot
    ##########################

    #### scale plot
    plot_dim = (plot_dim[0] * scale, plot_dim[1] * scale)
    line_width = line_width * scale

    # generate figure
    p = figure(plot_width=plot_dim[1],
               plot_height=plot_dim[0],
               title="Mean MPG by # Cylinders and Manufacturer",
               x_range=group,
               toolbar_location=None,
               tooltips=[('Category: ', name_tip), ('Value: ', value_tip)])

    # generate bar graph
    p.vbar(x=name,
           top=value_var + '_mean',
           width=.8,
           source=group,
           color=index_cmap,
           line_width=line_width,
           fill_alpha=fill_alpha)

    ##########################
    #### style the plot
    ##########################

    p.y_range.start = 0
    p.x_range.range_padding = 0.05
    p.xgrid.grid_line_color = None
    p.xaxis.major_label_orientation = 1.2
    p.outline_line_color = None

    p = USAID_style(p, font=font)

    ## title
    p.title.text_font_size = '15pt'
    p.title.text = title_text

    if major_label_orientation != 'horizontal':
        p.xaxis.major_label_orientation = pi / 2.5
    ######## drop tools if prompted
    if no_tools == True:
        p.toolbar.logo = None
        p.toolbar_location = None

    return p
Exemple #7
0
def gen_map_choropleth_values(data,
                              color_var='value_start',
                              color_high='#550000',
                              color_low='#FFAAAA',
                              num_color_cats=20,
                              map_type='OSM',
                              plot_dim=(350, 700),
                              background_color='white',
                              color_mapper=True,
                              title_text='',
                              font='Gill Sans MT',
                              country_outline_color='white',
                              fill_alpha=.7,
                              line_width=1,
                              hover=True,
                              color_range='default',
                              label_color='black'):
    #################
    ### color palette
    #################

    ### set first color
    low = Color(color_low)
    # generate range from 1 to color 2
    colors = list(low.range_to(Color(color_high), num_color_cats))
    # access the html codes for the colors
    colors = [c.hex_l for c in colors]
    # generate color mapper using colors and given data
    if color_range == 'default':
        mapper = LinearColorMapper(palette=colors,
                                   low=min(list(data[color_var].unique())),
                                   high=max(list(data[color_var].unique())))
    else:
        mapper = LinearColorMapper(palette=colors,
                                   low=color_range[0],
                                   high=color_range[1])

    ##################
    ### generate source data
    ##################

    source = ColumnDataSource(data)

    #####################
    #### generate plot
    ######################

    ## set ranges depending on OSM selection
    y_range = (-55, 78)
    x_range = (-125, 185)

    if map_type == 'OSM':
        p = figure(title=title_text,
                   plot_width=plot_dim[1],
                   plot_height=plot_dim[0],
                   x_range=(-13000000, 20500000),
                   y_range=(-6000000, 7000000),
                   x_axis_location=None,
                   y_axis_location=None,
                   background_fill_color=background_color,
                   x_axis_type="mercator",
                   y_axis_type="mercator",
                   tools='save,tap,reset')
        # add tile
        p.add_tile(CARTODBPOSITRON)

    else:
        # generate figure
        p = figure(title=title_text,
                   plot_width=plot_dim[1],
                   plot_height=plot_dim[0],
                   x_axis_location=None,
                   y_axis_location=None,
                   background_fill_color=background_color,
                   y_range=(y_range[0], y_range[1]),
                   x_range=(x_range[0], x_range[1]),
                   tools='save,tap,reset',
                   logo=None)

    #### Plot data
    p.grid.grid_line_color = None

    # generate patches
    r = p.patches(
        'x_coord',
        'y_coord',
        source=source,
        line_alpha=.5,
        fill_color={
            'field': color_var,
            'transform': mapper
        },
        fill_alpha=fill_alpha,
        line_color=country_outline_color,
        line_width=line_width,

        # set hover_tool properties
        hover_line_color={
            'field': color_var,
            'transform': mapper
        },
        hover_line_alpha=1,
        hover_fill_color={
            'field': color_var,
            'transform': mapper
        },

        # set visual properties for selected glyphs
        selection_fill_color={
            'field': color_var,
            'transform': mapper
        },
        selection_line_color={
            'field': color_var,
            'transform': mapper
        },
        selection_fill_alpha=1,

        # set visual properties for non-selected glyphs
        nonselection_fill_alpha=0.5,
        nonselection_fill_color={
            'field': color_var,
            'transform': mapper
        },
        nonselection_line_color="white",
        nonselection_line_alpha=.5)

    ######################
    ### add hover and mapper
    ######################

    TOOLTIP = """
    <div style="padding: 0px;"> 

        <div>
            <span style="font-size: 12px; font-family: 'Gill Sans MT', sans-serif; color: black;"><b>@country_name: </b>@value_start{0.0}</span>
        </div>
    </div>
    """

    if hover == True:

        hover_circle = HoverTool(renderers=[r],
                                 tooltips=TOOLTIP,
                                 point_policy='follow_mouse')
        p.add_tools(hover_circle)

    # mapper
    if color_mapper == True:
        color_bar = ColorBar(color_mapper=mapper,
                             major_label_text_font_size="10pt",
                             ticker=BasicTicker(desired_num_ticks=5),
                             orientation='vertical',
                             label_standoff=6,
                             border_line_color=None,
                             location=(0, 0),
                             width=10,
                             major_label_text_font=font,
                             background_fill_color=background_color,
                             major_label_text_color=label_color)
        p.add_layout(color_bar, 'right')

    ##########
    ### style setting s
    ##########

    p = USAID_style(p, font=font)

    p.grid.grid_line_alpha = .1

    return p
Exemple #8
0
def gen_map_country(country_sel,
                    color,
                    color_var='value_start',
                    color_high='#550000',
                    color_low='#FFAAAA',
                    num_color_cats=20,
                    map_type='coords',
                    OSM=False,
                    plot_dim=(350, 700),
                    background_color='white',
                    color_mapper=True,
                    font='Gill Sans MT',
                    country_outline_color='white',
                    fill_alpha=.7,
                    line_width=1,
                    hover=True,
                    color_range='default',
                    file='C:/Users/alightner/Documents/Shared/'):

    # read the file which was cleaned and exported in the 'Gen_Map.ipynb'
    df_map = pd.read_csv(file + 'pyADVISE/Data/country_' + map_type +
                         '_detailed.csv')

    # select country
    df_map = df_map[df_map['country_id'] == country_sel]

    # clean the data - lists were stored as strings.
    for i, row in df_map.iterrows():

        # lat/long coordinates
        x = ast.literal_eval(row['x_coord'])
        y = ast.literal_eval(row['y_coord'])

        # add values to the dataset
        df_map.set_value(i, 'x_coord', x)
        df_map.set_value(i, 'y_coord', y)

    ##################
    ### generate source data
    ##################

    source = ColumnDataSource(df_map)

    #####################
    #### generate plot
    ######################

    ## set ranges depending on OSM selection
    if map_type == 'OSM':
        p = figure(title="Practice Map",
                   plot_width=990,
                   plot_height=500,
                   x_axis_location=None,
                   y_axis_location=None,
                   background_fill_color=background_color,
                   x_axis_type="mercator",
                   y_axis_type="mercator",
                   tools="save",
                   logo=None)

    else:

        # generate figure
        p = figure(title="Practice Map",
                   plot_width=plot_dim[1],
                   plot_height=plot_dim[0],
                   x_axis_location=None,
                   y_axis_location=None,
                   background_fill_color=background_color,
                   tools='save,tap,reset',
                   logo=None)

    # generate patches
    r = p.patches('x_coord',
                  'y_coord',
                  source=source,
                  fill_alpha=fill_alpha,
                  line_color=country_outline_color,
                  line_width=line_width)

    ######################
    ### add hover and mapper
    ######################

    TOOLTIP = """
    <div style="padding: 10px;"> 

        <div>
            <span style="font-size: 12px; font: 'Gill Sans MT'; color: #002F6C;"><b>@country_name: </b></span>
        </div>
    </div>
    """

    if hover == True:

        hover_circle = HoverTool(renderers=[r], tooltips=TOOLTIP)
        p.add_tools(hover_circle)

    ##########
    ### style setting s
    ##########

    p = USAID_style(p, font=font)
    p.grid.grid_line_alpha = 0

    return p
Exemple #9
0
def gen_radar(df,
              obs_var,
              cat_var,
              value_var='value_start',
              title_text='',
              fill_alpha=.2,
              scale=1,
              line_alpha=.6,
              line_width=5,
              legend_location='top_right',
              no_tools=False,
              print_details=False,
              axis_marks='default',
              grid_color='#8C8985',
              patch_colors='default',
              font='Gill Sans MT',
              legend_orientation='vertical',
              plot_dim=(500, 500),
              label_font_size=14,
              num_font_size=10,
              custom_tooltip=None):

    ###########################################
    ### data input
    ###########################################

    # generate the unique observation variables
    obs = list(df[obs_var].astype('str').unique())

    # generate new list of texts
    text = list(df[cat_var].astype('str').unique())

    # find min and max values
    max_var = df[value_var].max()
    min_var = df[value_var].min()

    if axis_marks == 'default':
        # generate tick marks if default
        l = matplotlib.ticker.AutoLocator()
        l.create_dummy_axis()

        # get tick marks based on our min and max values
        axis_values = l.tick_values(min_var, max_var)
        # scale each value by the second to last value for plotting
        # this is because the plot is a 1 by 1 plot which we label differently depending on
        # values.
        axis_scaled = np.array(axis_values) / axis_values[-2]
        # address if the tick values are too many
        length = len(axis_values)
        if length > 6:
            ## access every other observatation beginning from the second to last observation moving backwards
            ### add the extra value for consistancy in the labelling from [-1] no matter the length
            axis_values = list(
                reversed([
                    axis_values[-2 + (-2 * i)]
                    for i in range(0, int(np.round(len(axis_values) / 2)))
                ])) + [1]
            axis_scaled = list(
                reversed([
                    axis_scaled[-2 + (-2 * i)]
                    for i in range(0, int(np.round(len(axis_scaled) / 2)))
                ])) + [1]

    else:
        # if axis values are provided
        axis_values = np.array(axis_marks)
        # scale values for reasons stated above (last value)
        axis_scaled = np.array(axis_values) / axis_values[-1]

    # depending on whether the scale is given or automated, we select the last or second to last
    # value in the list as the reference point for scaling. here we define this.
    a = -1
    if axis_marks == 'default':
        # select the second value
        a = -2

    # generate dictionary of all
    reals = {}
    scales = {}
    for i in list(df[obs_var].unique()):

        reals[str(i)] = df[df[obs_var] == i][value_var].values
        #scale by the largest value in the axis generated by matplotlib
        scales[str(
            i)] = df[df[obs_var] == i][value_var].values / axis_values[a] / 2

    # make the 'reals' dictionary the return value if detailed needed.
    detail_1 = reals

    # try to build plot, send empty plot if fail.
    try:

        flist = []
        for i in range(0, len(obs)):
            flist.append(scales[obs[i]])

        ## generate the number of vars to generate the shape.
        num_vars = len(text)

        ##############################################
        ######## Functions

        theta = np.linspace(0, 2 * np.pi, num_vars, endpoint=False)
        # rotate theta such that the first axis is at the top
        theta += np.pi / 2

        def unit_poly_verts(theta, r):
            """Return vertices of polygon for subplot axes.
            This polygon is circumscribed by a unit circle centered at (0.5, 0.5)
            """
            x0, y0, r = [0.5, 0.5, r]
            verts = [(r * np.cos(t) + x0, r * np.sin(t) + y0) for t in theta]
            return verts

        # turn data into circlular options
        def radar_patch(r, theta):
            yt = (r) * np.sin(theta) + 0.5
            xt = (r) * np.cos(theta) + 0.5
            return xt, yt

        #### scale plot
        plot_dim = (plot_dim[0] * scale, plot_dim[1] * scale)
        line_width = line_width * scale

        ######### Generate plot

        p = figure(title="",
                   plot_width=plot_dim[1],
                   plot_height=plot_dim[0],
                   x_range=(-.23, 1.55),
                   y_range=(-.1, 1.2),
                   tools='save,tap,reset')

        #################################
        #### Generate labels
        #################################

        # generate the locations of the labels
        verts = unit_poly_verts(theta, 0.55)
        x = [v[0] for v in verts]
        y = np.array([v[1] for v in verts]) - .03

        # separate between right and left side vars

        # if there are more than four cat_var's, then make one more label point to the left
        num_cat = len(text)
        if num_cat > 3:
            a = (int(np.round(len(text) / 2))) + 1
            left_labels = text[:a]
            right_labels = text[a:]
        else:
            a = (int(np.round(len(text) / 2)))
            left_labels = text[:a]
            right_labels = text[a:]

        # plot right and left labels (difference -> text align)
        source_left = ColumnDataSource({
            'x': x[:a],
            'y': y[:a],
            'text': left_labels
        })
        source_right = ColumnDataSource({
            'x': x[a:] + [0.5],
            'y': y[a:],
            'text': right_labels
        })

        # set font size
        fsize = str(label_font_size * scale) + 'pt'

        label_left = LabelSet(x="x",
                              y="y",
                              text="text",
                              source=source_left,
                              text_font=font,
                              text_font_size=fsize,
                              text_color=grid_color,
                              text_align='right')
        label_right = LabelSet(x="x",
                               y="y",
                               text="text",
                               source=source_right,
                               text_font=font,
                               text_font_size=fsize,
                               text_color=grid_color)
        # add to plot
        p.add_layout(label_left)
        p.add_layout(label_right)

        #################################
        # generate background
        #################################
        # circles

        for i in axis_scaled:
            glyph = Ellipse(x=0.5,
                            y=0.5,
                            width=i,
                            height=i,
                            fill_color=None,
                            line_width=scale,
                            line_color=grid_color,
                            line_alpha=0.5)
            p.add_glyph(glyph)

        #lines - generate coordinates - lines from center to coordinates
        verts = unit_poly_verts(theta, 0.50)
        x_lines = [v[0] for v in verts]
        y_lines = [v[1] for v in verts]

        for i in range(0, len(x_lines)):
            p.line(x=(0.5, x_lines[i]),
                   y=(0.5, y_lines[i]),
                   line_width=3 * scale,
                   line_color=grid_color,
                   line_alpha=0.5)

        #### numbered
        # access all but the last number in the axis_values list (we don't plot the last one in default)
        nums = axis_values
        if axis_marks == 'default':
            nums = axis_values[:-1]
        # we only  plot on half the circle (divide the length in half)
        x = np.array(axis_scaled) / 2
        x = [.5 - i for i in np.array(axis_scaled) / 2]
        # place the numbers in a horizontal line.
        y = [0.5, 0.5, 0.5, 0.5, 0.5]

        source = ColumnDataSource({'x': x, 'y': y, 'text': nums})

        fsize = str(num_font_size * scale) + 'pt'
        # place numbers on plot
        numbers = LabelSet(x="x",
                           y="y",
                           text="text",
                           source=source,
                           text_font=font,
                           text_font_size=fsize,
                           text_color=grid_color)

        p.add_layout(numbers)

        ##################################
        ####### Plot Patches and circles
        ###################################

        # this also sets a maximum number of observations at six which is reasonable
        if patch_colors == 'default':
            colors = [
                palette[i] for i in [
                    'USAID Blue', 'USAID Red', 'Medium Blue', 'Light Blue',
                    'Dark Red', 'Medium Gray'
                ]
            ]
        else:
            colors = patch_colors

        sources1 = pd.DataFrame()

        ##### Patches
        for i in range(len(flist)):
            xt, yt = radar_patch(flist[i], theta)

            sources1 = sources1.append(
                pd.DataFrame({
                    'xt': [xt],
                    'yt': [yt],
                    'obs': obs[i],
                    'colors': colors[i]
                }))

        details_1 = sources1

        TOOLTIPS = """
            <div> 

                <div>
                    <span style="font-size: 10px; font: 'Open Sans'; color: black;"><b>@obs</b></span>
                </div>
            </div>
        """
        if custom_tooltip != None:
            try:
                TOOLTIPS = custom_tooltip[0]
            except:
                print('Must pass custom tooltip in a list.')

        r = p.patches(xs='xt',
                      ys='yt',
                      fill_alpha=fill_alpha,
                      line_alpha=line_alpha,
                      color='colors',
                      line_width=line_width,
                      legend='obs',
                      source=ColumnDataSource(sources1),
                      hover_line_color='colors',
                      hover_line_alpha=1,
                      hover_color='colors',
                      hover_fill_alpha=.35)
        hover_p = HoverTool(renderers=[r], tooltips=TOOLTIPS)
        p.add_tools(hover_p)

        #### CIRCLE Graph (eventually change to be one source file)

        sources = {}
        for i in range(0, len(obs)):
            xt, yt = radar_patch(flist[i], theta)
            sources[i] = {
                'Category': text,
                'Obs': [obs[i]] * len(text),
                'Value': reals[obs[i]],
                'yt': yt,
                'xt': xt
            }

        ########## tooltip settings

        TOOLTIPS = """
            <div> 
                <div>
                    <span style="font-size: 15px; font: 'Open Sans'; color: black; "><b>@Obs</b></span>

                </div>
                <div>
                    <span style="font-size: 15px; font: 'Open Sans';color: black;"><b>@Category:</b> @Value{0.0}</span>
                </div>
            </div>
        """
        if custom_tooltip != None:
            try:
                TOOLTIPS = custom_tooltip[1]
            except:
                print(
                    'There are two tooltips, to change them both, pass two separate tooltips in a list.'
                )

        for i in range(0, len(obs)):
            s = p.circle(x='xt',
                         y='yt',
                         color=colors[i],
                         source=sources[i],
                         size=line_width * 2,
                         fill_alpha=.6,
                         hover_line_color='black',
                         hover_color=colors[i])
            hover_circle = HoverTool(renderers=[s], tooltips=TOOLTIPS)
            p.add_tools(hover_circle)

        #############################
        #### LEGENDs and STYLE
        ##############################

        p.legend.location = legend_location
        p.legend.orientation = legend_orientation

        p.title.text = title_text

        p = USAID_style(p, font=font)

        # basic formatting of the chart.
        p.axis.axis_label = None
        p.axis.visible = False
        p.grid.grid_line_color = None

    except ValueError:

        # generate plot dimentions with scale
        plot_dim = (plot_dim[0] * scale, (plot_dim[1]) * scale)

        # plot empty plot
        p = empty_plot(plot_dim, title='Something', scale=scale, scale_text=1)

        p.title.text_color = 'white'

        # print details if asked.
        if print_details == True:
            print('The dictionary of values:')
            print(detail_1)

    return p
Exemple #10
0
def gen_line(data,
             obs_sel,
             focus_sel,
             obs_labels,
             year_tuple=(2010, 2018),
             title_text='Figure 3: Comparison of multiple countries ³',
             obs_var='country_id',
             year_var='year',
             focus_var='series_id',
             value_var='value_start',
             fill_alpha=.5,
             scale=1,
             line_width=5,
             dots=True,
             legend_outside=True,
             legend_location='above',
             zero_line=False,
             x_axis_label='X-Axis',
             y_axis_label='Y-Axis',
             no_tools=False,
             print_details=False,
             font='Gill Sans MT',
             legend_orientation='horizontal',
             plot_dim=(300, 500)):
    '''This function takes multiple countries, with one indicator variable and compares them accordingly. Or one country and
    multiple indicators. At the moment, the function does not support multiple selections of both countries and indicators. '''

    #############################################
    ####  More than one country
    #############################################

    if len(obs_sel) >= 1 & len(focus_sel) == 1:

        data = select_data(data,
                           obs=(obs_var, obs_sel),
                           years=(year_var, year_tuple),
                           focus_vars=(focus_var, focus_sel))

        ##### prep the data in the correct order.
        # rename series_ids to thier order in the list
        for i in range(0, len(obs_sel)):
            data.loc[data[obs_var] == obs_sel[i], obs_var] = i
        data.sort_values([obs_var, year_var], inplace=True)

        obs_sel = list(range(0, len(obs_sel)))
        ##################################################
        #########  Generate Plot
        ##################################################

        ################# set styles

        palette_names = list(palette.keys())

        # generate color palettes
        colors = {obs_sel[i]: palette_names[i] for i in range(0, len(obs_sel))}

        # generate empty datasets to be filled
        df = pd.DataFrame()
        circles = pd.DataFrame()

        # select observations of interest
        data = data[data[obs_var].isin(obs_sel)]

        # interate over index values
        n = 0

        # for each observation
        for i in data[obs_var].unique():
            # select of a particular country
            df_small = data[data[obs_var] == i]

            # sort values
            df_small = df_small.sort_values([year_var])

            # select indicator of interest
            df_small = df_small[df_small[focus_var] == focus_sel[0]]

            # create empty dataframe to be
            df1 = pd.DataFrame()

            # place values in brakets so that they are entered as one observation lists.
            df1[obs_var] = [i]
            df1['x'] = [df_small[year_var].values]
            df1['y'] = [df_small[value_var].values]
            df1['color'] = palette[colors[i]]
            df1['label'] = [obs_labels[n]]

            n += 1

            # generate cicle df (the format needs to be panel)
            df_small['color'] = palette[colors[i]]

            circles = circles.append(df_small)

            df = df.append(df1)

        detail_1 = circles
        # generate the largest number of observations by obs_var
        try:
            length = data[obs_var].value_counts().tolist()[0]
        except:
            #print(data[obs_var])
            length = 0

        # only is there is a line for at least one of the observations
        if length > 1:

            ################################
            ### generate the plot
            ###############################

            #### scale plot
            plot_dim = (plot_dim[0] * scale, plot_dim[1] * scale)
            line_width = line_width * scale

            source = ColumnDataSource(df)

            p = figure(plot_height=plot_dim[0], plot_width=plot_dim[1])

            ############ add horizontal line at zero if true
            if zero_line == True:

                zero_line = Span(location=0,
                                 dimension='width',
                                 line_color='gray',
                                 line_dash='dashed',
                                 line_width=3 * scale,
                                 line_alpha=0.5)
                p.add_layout(zero_line)

            p.multi_line(xs='x',
                         ys='y',
                         legend='label',
                         line_width=line_width,
                         line_color='color',
                         line_alpha=fill_alpha,
                         hover_line_color='color',
                         hover_line_alpha=1.0,
                         source=source)

            ### generate circle

            circles = circles.rename(index=str,
                                     columns={
                                         year_var: "year",
                                         value_var: "value_start"
                                     })
            circles = ColumnDataSource(circles)

            if dots == True:
                r = p.circle(y='value_start',
                             x='year',
                             source=circles,
                             color='color',
                             size=2 * line_width)

                p.add_tools(
                    HoverTool(show_arrow=False,
                              line_policy='next',
                              renderers=[r],
                              tooltips=[('Obs', '@country_name'),
                                        ('X', '@year'),
                                        ('Y', '@value_start')]))

            if dots == False:
                p.add_tools(
                    HoverTool(show_arrow=False,
                              line_policy='next',
                              tooltips=[
                                  ('Obs', '@label'),
                              ]))

            #give details
            if print_details == True:
                print('The dictionary of values')
                print(detail_1)

            ######################
            ### styling options
            ######################

            # legend
            p.legend.glyph_width = 60
            p.legend.border_line_color = None

            p = USAID_style(p, font=font)

            p.title.text = title_text
            p.legend.background_fill_color = 'white'

            ######### legend location
            p.legend.orientation = legend_orientation
            if legend_outside == True:
                p.legend.location = 'center'
                new_legend = p.legend[0]
                p.legend[0].plot = None
                p.add_layout(new_legend, legend_location)
                p.legend.border_line_color = None

            else:
                p.legend.location = legend_location

            ######## drop tools if prompted
            if no_tools == True:
                p.toolbar.logo = None
                p.toolbar_location = None

        ### if no data
        else:

            # generate plot dimentions with scale
            plot_dim = (plot_dim[0] * scale, (plot_dim[1]) * scale)

            # plot empty plot
            p = empty_plot(plot_dim,
                           title='Something',
                           scale=scale,
                           scale_text=1)

            p.title.text_color = 'white'

            # print details if asked.
            if print_details == True:
                print('The dictionary of values:')
                print(detail_1)

    # if the wrong specification -> return this sentence.
    else:
        print(
            'This function does not support multiple country and indicator selections. Please revisit the obs_sel and focus_sel selections and var_names.'
        )

    return p