예제 #1
0
def create_chart_2(df):
    categories = sorted(df['acct_type'].unique().tolist())
    grouped = df.groupby('acct_type').sum()
    source = ColumnDataSource(grouped)
    cats = source.data['acct_type'].tolist()
    p = figure(x_range=cats, plot_width=500)

    color_map = factor_cmap(field_name='acct_type',
                            palette=Spectral5,
                            factors=cats)
    p.vbar(x='acct_type',
           top='actual_amount',
           source=source,
           width=0.70,
           color=color_map)
    p.title.text = 'Spend by Account'
    p.xaxis.axis_label = 'Account'
    p.yaxis.axis_label = 'Actual amounts'
    p.xaxis.major_label_orientation = 'vertical'

    hover = HoverTool()
    hover.tooltips = [("Totals", "@actual_amount")]
    hover.mode = 'vline'
    p.add_tools(hover)
    return p
예제 #2
0
    def bar_charts(self):
        output_file('munitions_by_country.html')
        data = pd.read_csv('test.csv')
        # We now need to get from the 170,000+ 
        # records of individual missions to one record per attacking country with the total munitions dropped
        grouped = data.groupby('COUNTRY_FLYING_MISSION')['TOTAL_TONS', 'TONS_HE', 'TONS_IC', 'TONS_FRAG'].sum()
        """Pandas lets us do this in a single line of code by using the groupby dataframe method. 
        This method accepts a column by which to group the data and one or more aggregating methods 
        that tell Pandas how to group the data together. The output is a new dataframe.

        Let's take this one piece at a time. The groupby('COUNTRY_FLYING_MISSION') sets the column that
        we are grouping on. In other words, this says that we want the resulting dataframe to have one 
        row per unique entry in the column COUNTRY_FLYING_MISSION. Since we don't care about aggregating
        all 19 columns in the dataframe, we choose just the tons of munitions columns with the indexer,
        ['TOTAL_TONS', 'TONS_HE', 'TONS_IC', 'TONS_FRAG']. Finally, we use the sum method to let Pandas 
        know how to aggregate all of the different rows. Other methods also exist for aggregating, such 
        as count, mean, max, and min."""
        print(grouped)
        #To plot this data, let's convert to kilotons by dividing by 1000.

        grouped = grouped / 1000
        source = ColumnDataSource(grouped)
        countries = source.data['COUNTRY_FLYING_MISSION'].tolist()
        p = figure(x_range=countries)
        """Now, we need to make a ColumnDataSource from our grouped data and create a figure.
        Since our x-axis will list the five countries (rather than numerical data) we need to 
        tell the figure how to handle the x-axis.
        To do this, we create a list of countries from our source object, using source.data 
        and the column name as key. The list of countries is then passed as the x_range to 
        our figure constructor. Because this is a list of text data, the figure knows the 
        x-axis is categorical and it also knows what possible values our x range can take 
        (i.e. AUSTRALIA, GREAT BRITAIN, etc.)."""

        color_map = factor_cmap(field_name='COUNTRY_FLYING_MISSION',
                            palette=Spectral5, factors=countries)

        p.vbar(x='COUNTRY_FLYING_MISSION', top='TOTAL_TONS', source=source, width=0.70, color=color_map)

        p.title.text ='Munitions Dropped by Allied Country'
        p.xaxis.axis_label = 'Country'
        p.yaxis.axis_label = 'Kilotons of Munitions'
        """To color our bars we use the factor_cmap helper function. This creates a special color
        map that matches an individual color to each category (i.e. what Bokeh calls a factor).
        The color map is then passed as the color argument to our vbar glyph method.
        For the data in our glyph method, passing a source and again referencing column 
        names. Instead of using a y parameter, however, the vbar method takes a top parameter.
        A bottom parameter can equally be specified, but if left out, its default value is 0."""

        hover = HoverTool()
        hover.tooltips = [
            ("Totals", "@TONS_HE High Explosive / @TONS_IC Incendiary / @TONS_FRAG Fragmentation")]
        hover.mode = 'vline'
        """ vline and hline tell the popup to show when a vertical or horizontal line crosses a glyph.
        With vline set here, anytime your mouse passes through an imaginary vertical line extending
        from each bar, a popup will show."""
        p.add_tools(hover)
        show(p)
예제 #3
0
def job_num_salary_plot():
    category = request.form['category']
    app.vars['category'] = category
    job_salary_num = dill.load(open('job_salary_num_1120.pkl', 'rb'))
    job_df = job_salary_num[job_salary_num['title'] == category]
    df = job_df[job_salary_num['title'] == category]
    df['sal'] = df['salary'].apply(
        lambda x: int(x.replace('$', '').replace(',', '')))
    df2 = df.sort_values(by='job num', ascending=False)[:10]

    cities = df2.Loc.values
    output_file('./templates/job_top10.html', title="Top10 cities")
    ymin = int(df2['sal'].min()) // 1000 * 1000
    ymax = int(df2['sal'].max()) // 1000 * 1050
    p = figure(x_range=cities,
               y_range=(60000, ymax),
               plot_width=1200,
               plot_height=350,
               title=('Top 10 cities with most job opportunities for ' +
                      category),
               toolbar_location=None,
               tools='')
    p.yaxis[0].formatter = NumeralTickFormatter(format="$0,0")
    p.vbar(x=dodge('Loc', -0.1, range=p.x_range),
           top='sal',
           width=0.2,
           source=df2,
           line_color='white',
           legend_label="Salary",
           color='green')

    y2max = (int(df2['job num'].max()) // 100) * 150
    p.extra_y_ranges = {'Job Number': Range1d(start=0, end=y2max)}
    p.add_layout(LinearAxis(y_range_name="Job Number"), 'right')
    p.vbar(x=dodge('Loc', 0.1, range=p.x_range),
           top='job num',
           width=0.2,
           source=df2,
           line_color='white',
           legend_label="Job number",
           color='orange',
           y_range_name='Job Number')
    p.xgrid.grid_line_color = None
    p.legend.location = "top_right"
    p.legend.orientation = "horizontal"

    hover = HoverTool()
    hover.tooltips = [('Title', '@title'), ('Average salary', '@salary'),
                      ('Job number', '@{job num}')]
    hover.mode = 'vline'
    p.add_tools(hover)
    save(p)
    return render_template('job_num_salary_plot.html', title=category)
예제 #4
0
def plot_LogsOvertime(df,
                      col1,
                      col2,
                      title,
                      x_label,
                      y_label,
                      tooltips=[('count', '@count')]):
    """return interactive line plot using bokeh"""

    grouped = pd.DataFrame(df.groupby([col1])[col2].sum())
    grouped.reset_index(inplace=True)

    # set amounts by billion dollars
    #grouped[col2]=grouped[col2]/col_transform
    source = ColumnDataSource(grouped)

    # initialize the figure
    p = figure(title=title,
               plot_width=1000,
               plot_height=450,
               x_axis_type='datetime')

    # create the plot
    p.line(x=col1, y=col2, line_width=3, source=source)

    # set formating parameters
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    p.title.text_font_size = "16pt"
    p.title.text_color = 'MidnightBlue'
    p.xaxis.axis_label_text_font_size = '15pt'
    p.yaxis.axis_label_text_font_size = '15pt'
    p.yaxis.axis_label = y_label
    p.xaxis.axis_label = x_label
    p.xaxis.major_label_text_font_size = '12pt'

    # add interactive hover tool that shows the amount awarded
    hover = HoverTool()
    #hover.tooltips = [('count', '@count')]
    hover.tooltips = tooltips

    hover.mode = 'vline'
    p.add_tools(hover)

    #display plot
    show(p)
예제 #5
0
def plot_stacked_bar_margin(df, cols, tooltips, couv_cor):
    
    # output to static HTML file
    output_file("state_node_clique_covers_dashboard.html");
    TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select,lasso_select";
    
    df_numgraph = df.groupby('num_graph_G_k')[cols].mean()
    
    df_numgraph['sum'] = df_numgraph.sum(axis=1)
    df_percent = df_numgraph.div(df_numgraph['sum'], axis=0)
    df_percent = round(df_percent,2)
    df_percent['sum'] = df_percent['sum'] * df_numgraph['sum'];
    
    source = ColumnDataSource(df_percent)
    
    graphs = source.data['num_graph_G_k'].tolist()
    p = figure(x_range=graphs,
               tools=TOOLS, 
               plot_height=HEIGHT, plot_width=WIDTH)
    
    p.vbar_stack(x='num_graph_G_k',
           stackers=cols,
           source=source, 
           legend = ['etat0', 'etat1', 'etat_1', 'etat2', 'etat3'],
           width=0.50, color=Spectral5)

    
    p.title.text ='etats des graphes '+ couv_cor;
    p.xaxis.axis_label = 'graphs'
    p.yaxis.axis_label = ''
    p.xaxis.major_label_orientation = 1
    
    hover = HoverTool()
    hover.tooltips = tooltips 
    
    hover.mode = 'vline'
    p.add_tools(hover)
    
    show(p)      
예제 #6
0
파일: Figures.py 프로젝트: STIPlab/R2r
def Fig5(parea, themes):

    writeTo = "templates/figures/" + parea + "/Figure5.html"

    output_file(writeTo, mode="inline")
    #NOTE: mode attribute to run output from a local file. In theory, it can be removed for an online file...

    #load online stip compass data using pipe '|' separator and skipping the second header (multi-indexing causes problems in the filtering)
    #url = 'https://stip.oecd.org/assets/downloads/STIP_Survey.csv'
    url = 'INPUT/STIP-Data-Flatcsv-Apr2020.csv'

    compass_df = pd.read_csv(url, sep='|', skiprows=[1])
    d = ['EGY', 'IDN', 'IND', 'MAR', 'MYS', 'SAU', 'SGL', 'SRB', 'URY', 'VNM']
    compass_df = compass_df[~compass_df['CountryCode'].isin(d)]
    compass_df.Tags.fillna("¬", inplace=True)

    Fig5_df = pd.DataFrame(columns=compass_df.columns)

    for th in themes:
        compass_df.loc[compass_df[th] == "1", th] = 1
        Fig5_df = pd.concat([Fig5_df, compass_df[compass_df[th] == 1]])

    Fig5_df.drop_duplicates(subset="InitiativeID", inplace=True)
    Fig5_df['count'] = 1

    grouped = Fig5_df.groupby('CoutryLabel')[['count',
                                              'HasBeenEvaluated']].sum()

    groupedkw = Fig5_df.groupby(
        'CoutryLabel')['Tags'].apply(lambda x: nltk.FreqDist(
            nltk.tokenize.regexp_tokenize('¬'.join(x), pattern='¬', gaps=True))
                                     )

    kwlist = groupedkw.groupby(level='CoutryLabel').nlargest(10).reset_index(
        level=0, drop=True).to_frame()
    kwlist.reset_index(level=1, inplace=True)
    kwlist.rename(columns={"level_1": "topconcepts"}, inplace=True)
    kwlist_merged = kwlist.groupby('CoutryLabel')['topconcepts'].apply(
        list).to_frame()
    kwlist_merged = kwlist_merged.topconcepts.apply(str).to_frame()

    grouped = pd.concat([grouped, kwlist_merged], axis=1, sort=True)

    ccodes_df = pd.read_csv('INPUT/ccodes.csv', index_col='CoutryLabel')
    grouped = grouped.join(ccodes_df)

    grouped.sort_values(by='count', ascending=True, inplace=True)

    source = ColumnDataSource(grouped)

    countries = source.data['CoutryLabel'].tolist()

    p = figure(plot_width=800,
               plot_height=1200,
               y_range=countries,
               tools="tap,pan,wheel_zoom,box_zoom,save,reset")

    #p.xaxis.major_label_orientation = pi/4

    p.hbar(name="myHM",
           y='CoutryLabel',
           right='count',
           left=0,
           source=source,
           height=0.50,
           color='#4292c6')

    title = "Figure 5. Policy initiatives reported under the \"" + parea + "\" policy area"
    p.title.text = title
    p.yaxis.axis_label = 'Country or other reporting entity'
    #p.xaxis.axis_label = 'Number of initiatives reported'

    p.add_layout(
        LinearAxis(axis_label='Number of policy initiatives reported'),
        'above')
    p.title.align = 'right'
    p.title.vertical_align = 'top'
    p.title.text_font_size = '11pt'

    p.xaxis.axis_label_text_font_size = "11pt"
    p.xaxis.axis_label_text_font_style = "normal"
    p.xaxis.major_label_text_font_size = "10pt"

    p.yaxis.axis_label_text_font_size = "12pt"
    p.yaxis.axis_label_text_font_style = "normal"
    p.yaxis.major_label_text_font_size = "10pt"

    hover = HoverTool()
    hover.tooltips = """
    <font color="#3eade0">Initiatives:</font> @count <br>
    <font color="#3eade0">Frequent keywords:</font> @topconcepts <br>
    <span style="font-weight: bold;">Click to browse initiatives in STIP Compass</span>
    
    """
    hover.mode = 'hline'
    p.add_tools(hover)

    #Prevent selection on click action to be highlighted
    renderer = p.select(name="myHM")
    renderer.nonselection_glyph = HBar(height=0.50,
                                       fill_color='#4292c6',
                                       line_color='#4292c6')

    if parea == "Governance":
        alink = "TH1"
    elif parea == "Public research system":
        alink = "TH2"
    elif parea == "Innovation in firms and innovative entrepreneurship":
        alink = "TH3"
    elif parea == "Science-industry knowledge transfer and sharing":
        alink = "TH5"
    elif parea == "Human resources for research and innovation":
        alink = "TH7"
    elif parea == "Research and innovation for society":
        alink = "TH8"
    else:
        alink = "TH84"

    compass = "******" + "/themes/" + alink

    taptool = p.select(type=TapTool)
    taptool.callback = OpenURL(url=compass)

    save(p)
예제 #7
0
파일: Figures.py 프로젝트: STIPlab/R2r
def Fig4(parea, themes):

    writeTo = "templates/figures/" + parea + "/Figure4.html"

    output_file(writeTo, mode="inline")
    #NOTE: mode attribute to run output from a local file. In theory, it can be removed for an online file...

    #load online stip compass data using pipe '|' separator and skipping the second header (multi-indexing causes problems in the filtering)
    #url = 'https://stip.oecd.org/assets/downloads/STIP_Survey.csv'
    url = 'INPUT/STIP-Data-Flatcsv-Apr2020.csv'

    head_df = pd.read_csv(url, sep='|', nrows=1)
    themes_df = head_df[themes]
    themes_df = themes_df.T
    #themes_df.rename(columns={"0": "THlabel"}, inplace=True)

    compass_df = pd.read_csv(url, sep='|', skiprows=[1])
    d = ['EGY', 'IDN', 'IND', 'MAR', 'MYS', 'SAU', 'SGL', 'SRB', 'URY', 'VNM']
    compass_df = compass_df[~compass_df['CountryCode'].isin(d)]
    compass_df.Tags.fillna("¬", inplace=True)

    compass_df['theme'] = ""
    Fig4_df = pd.DataFrame(columns=compass_df.columns)

    for th in themes:
        compass_df.loc[compass_df[th] == "1", th] = 1
        compass_df.loc[compass_df[th] == 1,
                       'theme'] = themes_df.loc[th].values[0]
        Fig4_df = pd.concat([Fig4_df, compass_df[compass_df[th] == 1]])

    Fig4_df.drop_duplicates(subset=['InitiativeID', 'YearlyBudgetRange'],
                            inplace=True)
    Fig4_df['count'] = 1

    grouped = Fig4_df.groupby('YearlyBudgetRange')[[
        'count', 'HasBeenEvaluated'
    ]].sum()

    groupedkw = Fig4_df.groupby(
        'YearlyBudgetRange')['Tags'].apply(lambda x: nltk.FreqDist(
            nltk.tokenize.regexp_tokenize('¬'.join(x), pattern='¬', gaps=True))
                                           )

    kwlist = groupedkw.groupby(
        level='YearlyBudgetRange').nlargest(10).reset_index(
            level=0, drop=True).to_frame()
    kwlist.reset_index(level=1, inplace=True)
    kwlist.rename(columns={"level_1": "topconcepts"}, inplace=True)
    kwlist_merged = kwlist.groupby('YearlyBudgetRange')['topconcepts'].apply(
        list).to_frame()
    kwlist_merged = kwlist_merged.topconcepts.apply(str).to_frame()

    grouped = pd.concat([grouped, kwlist_merged], axis=1, sort=True)

    grouped.sort_values(by='count', ascending=True, inplace=True)

    budgets_index = [
        'More than 500M', '100M-500M', '50M-100M', '20M-50M', '5M-20M',
        '1M-5M', 'Less than 1M', 'Not applicable', "Don't know"
    ]
    budgets_stip = {
        'budgets_links':
        ['BR15', 'BR14', 'BR13', 'BR12', 'BR11', 'BR10', 'BR9', 'BR16', 'BR1']
    }
    budgets_links_df = pd.DataFrame(data=budgets_stip, index=budgets_index)

    grouped = grouped.join(budgets_links_df)

    if parea == "Governance":
        alink = "TH1"
    elif parea == "Public research system":
        alink = "TH2"
    elif parea == "Innovation in firms and innovative entrepreneurship":
        alink = "TH3"
    elif parea == "Science-industry knowledge transfer and sharing":
        alink = "TH5"
    elif parea == "Human resources for research and innovation":
        alink = "TH7"
    elif parea == "Research and innovation for society":
        alink = "TH8"
    else:
        alink = "TH84"

    grouped[
        'links'] = "https://stip.oecd.org/ws/STIP/API/getPolicyInitiatives.xqy?format=csv&th=" + str(
            alink) + "&br-extra=" + grouped['budgets_links'].map(str)

    source = ColumnDataSource(grouped)

    budgets = source.data['YearlyBudgetRange'].tolist()

    p = figure(plot_width=800,
               plot_height=400,
               y_range=budgets,
               tools="tap,pan,wheel_zoom,box_zoom,save,reset")

    #p.xaxis.major_label_orientation = pi/4

    p.hbar(name="myHM",
           y='YearlyBudgetRange',
           right='count',
           left=0,
           source=source,
           height=0.50,
           color='#4292c6')

    title = "Figure 4. Policies reported by budget range, \"" + parea + "\" policy area"
    p.title.text = title
    p.yaxis.axis_label = 'Budget range (in EUR)'
    #p.xaxis.axis_label = 'Number of policy initiatives reported'
    p.add_layout(
        LinearAxis(axis_label='Number of policy initiatives reported'),
        'above')
    p.title.align = 'right'
    p.title.vertical_align = 'top'
    p.title.text_font_size = '11pt'

    p.xaxis.axis_label_text_font_size = "11pt"
    p.xaxis.axis_label_text_font_style = "normal"
    p.xaxis.major_label_text_font_size = "10pt"

    p.yaxis.axis_label_text_font_size = "12pt"
    p.yaxis.axis_label_text_font_style = "normal"
    p.yaxis.major_label_text_font_size = "10pt"

    hover = HoverTool()
    hover.tooltips = """
    <font color="#3eade0">Initiatives:</font> @count <br>
    <font color="#3eade0">Frequent keywords:</font> @topconcepts <br>
    <span style="font-weight: bold;">Click to download data</span>
    """

    hover.mode = 'hline'
    p.add_tools(hover)

    #Prevent selection on click action to be highlighted
    renderer = p.select(name="myHM")
    renderer.nonselection_glyph = HBar(height=0.50,
                                       fill_color='#4292c6',
                                       line_color='#4292c6')

    callback = CustomJS(args={
        'source': source,
        'title': p.title
    },
                        code="""
        var idx = source.selected.indices
        var url = source.data['links'][idx]            
        var temptext = title.text
        var tempcolor = title.text_color
        title.text = "Download in progress- this may take up to one minute."
        title.text_color = "red"
        fetch(url, {
              method: 'GET',
            }).then(function(resp) {
              return resp.blob();
            }).then(function(blob) {
              const newBlob = new Blob([blob], { type: "text/csv", charset: "UTF-8" })
        
              // IE doesn't allow using a blob object directly as link href
              // instead it is necessary to use msSaveOrOpenBlob
              if (window.navigator && window.navigator.msSaveOrOpenBlob) {
                window.navigator.msSaveOrOpenBlob(newBlob);
                return;
              }
              const data = window.URL.createObjectURL(newBlob);
              const link = document.createElement('a');
              link.dataType = "json";
              link.href = data;
              link.download = "STIP_COMPASS_Policy_Initiatives_Export.csv";
              link.dispatchEvent(new MouseEvent('click'));
              setTimeout(function () {
                // For Firefox it is necessary to delay revoking the ObjectURL
                window.URL.revokeObjectURL(data), 60
              });
            });
        
        setTimeout(function (){
    
            title.text = temptext
            title.text_color = tempcolor
    
        }, 7000)
    """)

    #Add click action
    p.js_on_event(Tap, callback)

    save(p)
예제 #8
0
파일: Figures.py 프로젝트: STIPlab/R2r
def Fig3(parea, themes):

    writeTo = "templates/figures/" + parea + "/Figure3.html"

    output_file(writeTo, mode="inline")
    #NOTE: mode attribute to run output from a local file. In theory, it can be removed for an online file...

    #load online stip compass data using pipe '|' separator and skipping the second header (multi-indexing causes problems in the filtering)
    #url = 'https://stip.oecd.org/assets/downloads/STIP_Survey.csv'
    url = 'INPUT/STIP-Data-Flatcsv-Apr2020.csv'

    head_df = pd.read_csv(url, sep='|', nrows=1)
    themes_df = head_df[themes]
    themes_df = themes_df.T
    #themes_df.rename(columns={"0": "THlabel"}, inplace=True)

    compass_df = pd.read_csv(url, sep='|', skiprows=[1])
    d = ['EGY', 'IDN', 'IND', 'MAR', 'MYS', 'SAU', 'SGL', 'SRB', 'URY', 'VNM']
    compass_df = compass_df[~compass_df['CountryCode'].isin(d)]
    compass_df.Tags.fillna("¬", inplace=True)

    compass_df['theme'] = ""
    Fig3_df = pd.DataFrame(columns=compass_df.columns)

    for th in themes:
        compass_df.loc[compass_df[th] == "1", th] = 1
        compass_df.loc[compass_df[th] == 1,
                       'theme'] = themes_df.loc[th].values[0]
        Fig3_df = pd.concat([Fig3_df, compass_df[compass_df[th] == 1]])

    Fig3_df.drop_duplicates(subset=['InitiativeID', 'InstrumentTypeLabel'],
                            inplace=True)
    Fig3_df['count'] = 1

    grouped = Fig3_df.groupby('InstrumentTypeLabel')[[
        'count', 'HasBeenEvaluated'
    ]].sum()

    groupedkw = Fig3_df.groupby(
        'InstrumentTypeLabel')['Tags'].apply(lambda x: nltk.FreqDist(
            nltk.tokenize.regexp_tokenize('¬'.join(x), pattern='¬', gaps=True))
                                             )

    kwlist = groupedkw.groupby(
        level='InstrumentTypeLabel').nlargest(10).reset_index(
            level=0, drop=True).to_frame()
    kwlist.reset_index(level=1, inplace=True)
    kwlist.rename(columns={"level_1": "topconcepts"}, inplace=True)
    kwlist_merged = kwlist.groupby('InstrumentTypeLabel')['topconcepts'].apply(
        list).to_frame()
    kwlist_merged = kwlist_merged.topconcepts.apply(str).to_frame()

    grouped = pd.concat([grouped, kwlist_merged], axis=1, sort=True)

    grouped.sort_values(by='count', ascending=True, inplace=True)

    inst_index = [
        'Centres of excellence grants',
        'Corporate tax relief for R&D and innovation',
        'Creation or reform of governance structure or public body',
        'Debt guarantees and risk sharing schemes',
        'Dedicated support to research infrastructures',
        'Emerging technology regulation', 'Equity financing',
        'Fellowships and postgraduate loans and scholarships',
        'Formal consultation of stakeholders or experts',
        'Grants for business R&D and innovation',
        'Horizontal STI coordination bodies',
        'Information services and access to datasets', 'Innovation vouchers',
        'Institutional funding for public research',
        'Intellectual property regulation and incentives',
        'Labour mobility regulation and incentives',
        'Loans and credits for innovation in firms',
        'National strategies, agendas and plans',
        'Networking and collaborative platforms',
        'Policy intelligence (e.g. evaluations, benchmarking and forecasts)',
        'Procurement programmes for R&D and innovation',
        'Project grants for public research',
        'Public awareness campaigns and other outreach activities',
        'Regulatory oversight and ethical advice bodies',
        'Science and innovation challenges, prizes and awards',
        'Standards and certification for technology development and adoption',
        'Technology extension and business advisory services'
    ]
    inst_stip = {
        'inst_links': [
            'Centres_of_excellence_grants', 'Tax_relief',
            'Creation_or_reform_of_governance_structure_or_public_body',
            'Debt_guarantees_and_risk_sharing_schemes',
            'Dedicated_support_to_new_research_infrastructures',
            'Emerging_technology_regulation', 'Equity_financing',
            'Postgraduate_loans_scholarships_and_fellowships',
            'Public_consultation_of_stakeholders',
            'Project_grants_for_business_RD_and_innovation',
            'Horizontal_STI_coordination_bodies',
            'Information_services_and_databases', 'Innovation_vouchers',
            'Institutional_funding_for_public_research',
            'Intellectual_property_regulation_and_incentives',
            'Labour_mobility_regulation_and_incentives',
            'Loans_and_credits_for_innovation_in_firms',
            'National_strategies_agendas_and_plans',
            'Networking_and_collaborative_platforms', 'Policy_intelligence',
            'Procurement_programmes_for_RD_and_innovation',
            'Project_grants_for_public_research',
            'Public_awareness_campaigns_and_other_outreach_activities',
            'Regulatory_oversight_and_ethical_advice_bodies',
            'Innovation_challenges_prizes_and_awards',
            'Standards_and_certification_for_technology_development_and_adoption',
            'Technology_transfer_and_business_advisory_services'
        ]
    }
    inst_links_df = pd.DataFrame(data=inst_stip, index=inst_index)

    grouped = grouped.join(inst_links_df)

    if parea == "Governance":
        alink = "TH1"
    elif parea == "Public research system":
        alink = "TH2"
    elif parea == "Innovation in firms and innovative entrepreneurship":
        alink = "TH3"
    elif parea == "Science-industry knowledge transfer and sharing":
        alink = "TH5"
    elif parea == "Human resources for research and innovation":
        alink = "TH7"
    elif parea == "Research and innovation for society":
        alink = "TH8"
    else:
        alink = "TH84"

    grouped[
        'links'] = "https://stip.oecd.org/ws/STIP/API/getPolicyInitiatives.xqy?format=csv&pi=" + grouped[
            'inst_links'].map(str) + "&th=" + str(
                alink) + "&br-extra=none,BR16,BR1&br=BR9,BR15"

    source = ColumnDataSource(grouped)

    instruments = source.data['InstrumentTypeLabel'].tolist()

    p = figure(plot_width=800,
               plot_height=800,
               y_range=instruments,
               tools="tap,pan,wheel_zoom,box_zoom,save,reset")

    #p.xaxis.major_label_orientation = pi/4

    p.hbar(name="myHM",
           y='InstrumentTypeLabel',
           right='count',
           left=0,
           source=source,
           height=0.50,
           color='#4292c6')

    title = "Figure 3. Types of instruments reported, \"" + parea + "\" policy area"
    p.title.text = title
    p.yaxis.axis_label = 'Type of policy instrument'
    #p.xaxis.axis_label = 'Number of instances reported'
    p.add_layout(LinearAxis(axis_label='Number of instances reported'),
                 'above')
    p.title.align = 'right'
    p.title.vertical_align = 'top'
    p.title.text_font_size = '11pt'

    p.xaxis.axis_label_text_font_size = "11pt"
    p.xaxis.axis_label_text_font_style = "normal"
    p.xaxis.major_label_text_font_size = "10pt"

    p.yaxis.axis_label_text_font_size = "12pt"
    p.yaxis.axis_label_text_font_style = "normal"
    p.yaxis.major_label_text_font_size = "10pt"

    hover = HoverTool()
    hover.tooltips = """
    <font color="#3eade0">Instruments:</font> @count <br>
    <font color="#3eade0">Frequent keywords:</font> @topconcepts <br>
    <span style="font-weight: bold;">Click to download data</span>
    """

    hover.mode = 'hline'
    p.add_tools(hover)

    #Prevent selection on click action to be highlighted
    renderer = p.select(name="myHM")
    renderer.nonselection_glyph = HBar(height=0.50,
                                       fill_color='#4292c6',
                                       line_color='#4292c6')

    callback = CustomJS(args={
        'source': source,
        'title': p.title
    },
                        code="""
        var idx = source.selected.indices
        var url = source.data['links'][idx]            
        var temptext = title.text
        var tempcolor = title.text_color
        title.text = "Download in progress- this may take up to one minute."
        title.text_color = "red"
        fetch(url, {
              method: 'GET',
            }).then(function(resp) {
              return resp.blob();
            }).then(function(blob) {
              const newBlob = new Blob([blob], { type: "text/csv", charset: "UTF-8" })
        
              // IE doesn't allow using a blob object directly as link href
              // instead it is necessary to use msSaveOrOpenBlob
              if (window.navigator && window.navigator.msSaveOrOpenBlob) {
                window.navigator.msSaveOrOpenBlob(newBlob);
                return;
              }
              const data = window.URL.createObjectURL(newBlob);
              const link = document.createElement('a');
              link.dataType = "json";
              link.href = data;
              link.download = "STIP_COMPASS_Policy_Initiatives_Export.csv";
              link.dispatchEvent(new MouseEvent('click'));
              setTimeout(function () {
                // For Firefox it is necessary to delay revoking the ObjectURL
                window.URL.revokeObjectURL(data), 60
              });
            });
        
        setTimeout(function (){
    
            title.text = temptext
            title.text_color = tempcolor
    
        }, 7000)
    """)

    #Add click action
    p.js_on_event(Tap, callback)

    save(p)
예제 #9
0
grouped = grouped / 1000

source = ColumnDataSource(grouped)
countries = source.data['COUNTRY_FLYING_MISSION'].tolist()

p = figure(x_range=countries)

color_map = factor_cmap(field_name='COUNTRY_FLYING_MISSION',
                        palette=Spectral5,
                        factors=countries)

p.vbar(x='COUNTRY_FLYING_MISSION',
       top="TOTAL_TONS",
       source=source,
       width=0.5,
       color=color_map)

p.title.text = ' Munitions Dropped by Allied Country'
p.xaxis.axis_label = 'Country'
p.yaxis.axis_label = 'Kilotons of Munitions'

hover = HoverTool()
hover.tooltips = [(
    "Totals",
    "@TONS_HE High Explosvie / @TONS_IC Incendiary / @TONS_FRAG Fragmentation")
                  ]
hover.mode = 'vline'
p.add_tools(hover)

show(p)
grouped = grouped / 1000  # convert to kilotons? how works?

source = ColumnDataSource(grouped)
countries = source.data['COUNTRY_FLYING_MISSION'].tolist()
p = figure(x_range=countries)  # tell figure how to handle the x-axis
# auto-categorical because list is TEXT data
# indiv-colored bars per each FACTOR (category)
color_map = factor_cmap(field_name='COUNTRY_FLYING_MISSION',
                        palette=Spectral5,
                        factors=countries)

p.vbar(x='COUNTRY_FLYING_MISSION',
       top='TOTAL_TONS',
       source=source,
       width=0.70,
       color=color_map)

p.title.text = "Munitions Dropped by Allied Country"
p.xaxis.axis_label = "Country"
p.yaxis.axis_label = "Kilotons of Munitions"

hover = HoverTool()  # multiple data variables in a single line
hover.tooltips = [(
    "Totals",
    "@TONS_HE High Explosive / @TONS_IC Incendiary / @TONS_FRAG Fragmentation")
                  ]

hover.mode = "vline"  # programs WHEN to POPUP (if mouse cross a vline in bar)
p.add_tools(hover)
show(p)
예제 #11
0
        left=0,
        height=0.5,
        color="#FFFF99")

p5.title.text = 'Naked Realization by state'
p5.title.align = 'center'
p5.xgrid.grid_line_color = None
p5.ygrid.grid_line_color = None
p5.xaxis.visible = False
p5.outline_line_width = 7
p5.outline_line_alpha = 0.3
p5.outline_line_color = "#E5FFCC"
hover = HoverTool()
hover.tooltips = [("Totals", "@Naked Realization(Rs")]

hover.mode = 'hline'

p5.add_tools(hover)
sales_data = df.groupby(['Grade Description',
                         'Region Description'])['Sales Qty'].sum()
print(sales_data)
data = [row for row in sales_data]
labels = ['OPC43', 'OPC53', 'PPC', 'PSC']
values = [data[0], data[1], data[2], data[3]]
source = ColumnDataSource(
    data=dict(start=['labels', 0], end=['values', 2 * pi]))
#states = source.data['Grade Description'].tolist()
p6 = figure(plot_width=180, plot_height=180)
p6.wedge(x=0,
         y=0,
         start_angle='start',
예제 #12
0
    def flights(self):
        avion = pd.read_csv("fly_mia.csv", encoding="latin-1")
        a = avion
        a['est_arr_time'] = a['est_arr_time'].str.replace('?', '')
        a['est_arr_time'] = a['est_arr_time'].str.replace(r"\(.*\)", "")
        a = a[a.est_arr_time.str.contains('0')]

        sun1 = a[a.est_arr_time.str.contains('Sun')]
        sun1['est_arr_time'] = sun1['est_arr_time'].str.replace(
            'Sun', '2019-08-18')
        sun1['dep_time'] = sun1['dep_time'].str.replace('Sun', '2019-08-18')
        sat1 = a[a.est_arr_time.str.contains('Sat')]
        sat1['est_arr_time'] = sat1['est_arr_time'].str.replace(
            'Sat', '2019-08-17')
        sat1['dep_time'] = sat1['dep_time'].str.replace('Sat', '2019-08-17')
        fri1 = a[a.est_arr_time.str.contains('Fri')]
        fri1['est_arr_time'] = fri1['est_arr_time'].str.replace(
            'Fri', '2019-08-16')
        fri1['dep_time'] = fri1['dep_time'].str.replace('Fri', '2019-08-16')

        ok2 = pd.concat([sun1, sat1, fri1], axis=0)
        ok2['dep_time'] = ok2['dep_time'].str.replace('Fri', '2019-08-16')
        ok2['dep_time'] = ok2['dep_time'].str.replace('Sat', '2019-08-17')

        ok2['dep_time'] = pd.to_datetime(ok2['dep_time'])
        ok2['est_arr_time'] = pd.to_datetime(ok2['est_arr_time'])

        ok2['flight_time'] = ok2['est_arr_time'] - ok2['dep_time']
        ok2['flight_time'] = ok2['flight_time'].dt.total_seconds()
        ok2['flight_time'] = ok2['flight_time'] / 60  #to minutes

        #airport time zones (departure zones)
        #1. cest
        cest = ok2[ok2.origin.str.contains(
            'MAD|ZRH|BRU|MXP|CDG|DUS|FCO|VIE|FRA|Pisa|BCN|ZAZ|WAW|ORY|AMS')]
        cest['flight_time'] = cest['flight_time'] + 360
        cest['flight_time'] = cest['flight_time'].apply(lambda x: 561
                                                        if x < 400 else x)

        #2.south american flights
        sa = ok2[ok2.origin.str.contains(
            "GIG|FOR|COR|EZE|Dois de|BSB|GRU|REC|MVD|BEL|SNU")]
        sa['flight_time'] = sa['flight_time'] + 60
        sa['flight_time'] = sa['flight_time'].apply(lambda x: 451.5
                                                    if x < 350 else x)
        otro = ok2[~ok2.origin.str.contains(
            'MAD|ZRH|BRU|MXP|CDG|DUS|FCO|VIE|FRA|Pisa|BCN|ZAZ|WAW|ORY|AMS|GIG|FOR|COR|EZE|Dois de|BSB|GRU|REC|MVD|BEL|SNU'
        )]
        todos = pd.concat([cest, sa, otro], axis=0)

        # percent of flights less one hour
        bins = [0, 60, 120, 180, 240, 300, 360, 420, 480, 540, 600, 660]
        todos['flight_bins'] = pd.cut(todos['flight_time'], bins)

        pct_time = todos['flight_bins'].value_counts()
        pct_time = pd.DataFrame(pct_time)
        pct_time.reset_index(level=0, inplace=True)
        pct_time['pct'] = pct_time['flight_bins'] / todos.shape[0]

        #ii. variance by origin
        vaR = todos.groupby('origin')['flight_time'].var()
        vaR.sort_values()

        #iii. arrives by part of the day
        tiempo = todos[["origin", "est_arr_time"]]
        t = tiempo
        t['hours'] = t['est_arr_time'].dt.hour
        t['minutes'] = t['est_arr_time'].dt.minute

        mid_six = t[(t.hours >= 0) & (t.hours <= 6)]
        seven_twelve = t[(t.hours >= 7) & (t.hours <= 12)]
        one_six = t[(t.hours >= 13) & (t.hours <= 18)]
        seven_twelve1 = t[(t.hours >= 19) & (t.hours <= 23)]

        #percent arrivals by time of the day
        mid_sixP = mid_six.shape[0] / t.shape[0]
        seven_twelveP = seven_twelve.shape[0] / t.shape[0]
        one_sixP = one_six.shape[0] / t.shape[0]
        seven_twelveP1 = seven_twelve1.shape[0] / t.shape[0]

        #origin counts
        ori = t['origin'].value_counts()
        ori = pd.DataFrame(ori)
        ori.reset_index(level=0, inplace=True)
        ori.columns = ['origin', 'total']

        #time between flights
        tX = todos
        tX.sort_values(['origin', 'dep_time'], inplace=True)
        tX['diff_dep'] = tX['dep_time'].diff()
        mask = tX.origin != tX.origin.shift(1)
        tX['diff_dep'][mask] = np.nan
        tX['diff_dep'] = tX['diff_dep'].dt.total_seconds()
        tX['diff_dep'] = tX['diff_dep'] / 60  #to minutes
        tX.iloc[0:10]
        tX = tX[~(tX.diff_dep == 0)]

        takeoffs = tX.groupby('origin')['diff_dep'].median()
        takeoffs = takeoffs.sort_values()
        takeoffs = pd.DataFrame(takeoffs)
        take = takeoffs
        take = take[take.diff_dep >= 1]
        take1 = take[take.diff_dep <= 80]

        s = t
        s = s.set_index('est_arr_time')
        s = s.loc['2019-08-17 00:00:00':'2019-08-17 23:59:59']

        #VIZ I
        #east coast time vs. cst,pdt, and mdt (comparing flight times)
        west_cent = tX[tX.origin.str.contains(
            'LAX|SFO|LAS|SEA|SAN|SNU|DFW|MEX|MDW|MSY|CMW|MEM|ORD|TUL|MSP|MCI|STL|MID|IAH|VRA|PNS|GDL|MTY|KSAT|BHM|SCU|HOG|TLC|HSV'
        )]
        east = tX[tX.origin.str.contains(
            'NAS|PHI|Toron|Bahama|DCA|HAV|ORF|TPA|LGA|JAX|SAV|SDF|PIE|GGT|PLS|CVG|PIT|CHS|CLE|JFK|CAP|IND|DTW|KEY|CMH|BUF|RDU|SFB|MYEH|MYAM|CYUL|GSP|PBI|RIC|GSO|FMY|BDL|BWI|KTEB|ZSA|KMLB|KAPF|SGJ'
        )]

        #length of flights
        wc = west_cent['flight_bins'].value_counts()
        wc = pd.DataFrame(wc)
        wc.columns = ['flight_time']
        wc.reset_index(level=0, inplace=True)
        wc = wc.sort_values(by="index")
        wc = wc.set_index('index')

        ea = east['flight_bins'].value_counts()
        ea = pd.DataFrame(ea)
        ea.columns = ['flight_time']
        ea.reset_index(level=0, inplace=True)
        ea = ea.sort_values(by="index")
        ea = ea.set_index('index')

        factors = [("0-60"), ("60-120"), ("120-180"), ("180-240"), ("240-300"),
                   ("300-360"), ("360-420"), ("420-480"), ("480-540"),
                   ("540-600"), ("600-660")]
        regions = ['east_time_zone', 'other_time_zone']
        east_data = ea.flight_time.tolist()
        west_data = wc.flight_time.tolist()

        source = ColumnDataSource(data=dict(
            x=factors,
            east_time_zone=east_data,
            other_time_zone=west_data,
        ))

        p = figure(x_range=FactorRange(*factors),
                   plot_height=250,
                   toolbar_location=None,
                   tools="")
        p.vbar_stack(regions,
                     x='x',
                     width=0.9,
                     alpha=0.5,
                     color=["orange", "purple"],
                     source=source,
                     legend=[value(x) for x in regions])

        p.y_range.start = 0
        p.y_range.end = 120
        p.x_range.range_padding = 0.1
        p.xaxis.major_label_orientation = 1
        p.xgrid.grid_line_color = None
        p.xaxis.axis_label = 'Flight Time (Minutes)'
        p.yaxis.axis_label = 'Frequency'
        p.legend.location = "top_right"
        p.legend.orientation = "horizontal"
        output_file("mia1.html")
        #show(p)

        #VIZ II (time between departures)
        source1 = ColumnDataSource(take1)
        airports = source1.data['origin'].tolist()
        p1 = figure(x_range=airports)
        p1.vbar_stack(stackers=['diff_dep'],
                      x='origin',
                      source=source1,
                      width=0.5)
        p1.title.text = 'Time Between Flight Departures'
        p1.title.align = "center"
        p1.title.text_color = "orange"
        p1.xaxis.major_label_orientation = math.pi / 4.25
        p1.xaxis.axis_label = ''
        p1.yaxis.axis_label = 'Minutes'
        hover = HoverTool()
        hover.tooltips = [("Time Between Flights", "@diff_dep minutes")]
        hover.mode = 'vline'
        p1.add_tools(hover)
        output_file("mia2.html")
        #show(p1)

        #VIZ III (what time of the day do flights arrive?)
        time_arr = [
            'Midnight to 7 AM', '7 AM to 1 PM', '1 PM to 7 PM',
            '7 PM to Midnight'
        ]
        counts = [mid_sixP, seven_twelveP1, one_sixP, seven_twelveP1]
        palette = ['lavender', 'plum', 'darkviolet', 'indigo']

        source = ColumnDataSource(data=dict(time_arr=time_arr, counts=counts))

        p = figure(x_range=time_arr,
                   plot_height=250,
                   toolbar_location=None,
                   title="When Do Flights to X Arrive?")
        p.vbar(x='time_arr',
               top='counts',
               width=0.5,
               source=source,
               color="teal",
               line_color='white')

        p.xgrid.grid_line_color = None
        p.y_range.start = 0.0
        p.y_range.end = 0.6
        p.xaxis.axis_label = ""
        p.yaxis.major_label_overrides = {
            0: '0',
            0.1: '10%',
            0.2: '20%',
            0.3: '30%',
            0.4: '40%',
            0.5: '50%'
        }
        p.yaxis.axis_label = "Total Flights"
        p.legend.orientation = "horizontal"
        p.legend.location = "top_center"
        p.title.align = "center"
        output_file("mia3.html")
        #show(p)

        #VIZ IV (outlier flights time plot)
        top_diez = tX['origin'].value_counts()
        top_diez = pd.DataFrame(top_diez)
        top_diez.reset_index(level=0, inplace=True)
        air_names = top_diez.iloc[0:10]["index"]
        an = air_names
        an0 = an.iloc[0]
        an1 = an.iloc[1]
        an2 = an.iloc[2]
        an3 = an.iloc[3]
        an4 = an.iloc[4]
        an5 = an.iloc[5]
        an6 = an.iloc[6]
        an7 = an.iloc[7]
        an8 = an.iloc[8]
        an9 = an.iloc[9]

        sub_air = tX[(tX.origin == an0) | (tX.origin == an1) |
                     (tX.origin == an2) | (tX.origin == an3) |
                     (tX.origin == an4) | (tX.origin == an5) |
                     (tX.origin == an6) | (tX.origin == an7) |
                     (tX.origin == an8) | (tX.origin == an9)]
        df = pd.DataFrame(
            dict(flight_time=sub_air['flight_time'], group=sub_air['origin']))
        originS = df['group'].unique().tolist()
        groups = df.groupby('group')
        q1 = groups.quantile(q=0.25)
        q2 = groups.quantile(q=0.50)
        q3 = groups.quantile(q=0.75)
        iqr = q3 - q1
        upper = q3 + 1.5 * iqr
        lower = q1 - 1.5 * iqr

        #find outliers in each group
        def outliers(group):
            originS = group.name
            return group[(
                group.flight_time > upper.loc[originS]['flight_time']) |
                         (group.flight_time < lower.loc[originS]['flight_time']
                          )]['flight_time']

        out = groups.apply(outliers).dropna()

        #prepare outlier data for plotting
        if not out.empty:
            outx = []
            outy = []
            for keys in out.index:
                outx.append(keys[0])
                outy.append(out.loc[keys[0]].loc[keys[1]])

        p = figure(tools="",
                   background_fill_color="#efefef",
                   x_range=originS,
                   toolbar_location=None)

        #if no outliers, shrink lengths of stems to be no longer than the minimums or maximums
        qmin = groups.quantile(q=0.00)
        qmax = groups.quantile(q=1.00)
        upper.score = [
            min([x, y])
            for (x, y) in zip(list(qmax.loc[:,
                                            'flight_time']), upper.flight_time)
        ]
        lower.score = [
            max([x, y])
            for (x, y) in zip(list(qmin.loc[:,
                                            'flight_time']), lower.flight_time)
        ]

        # stems
        p.segment(originS,
                  upper.flight_time,
                  originS,
                  q3.flight_time,
                  line_color="black")
        p.segment(originS,
                  lower.flight_time,
                  originS,
                  q1.flight_time,
                  line_color="black")

        # boxes
        p.vbar(originS,
               0.7,
               q2.flight_time,
               q3.flight_time,
               fill_color="aqua",
               line_color="black")
        p.vbar(originS,
               0.7,
               q1.flight_time,
               q2.flight_time,
               fill_color="maroon",
               line_color="black")

        # whiskers (almost-0 height rects simpler than segments)
        p.rect(originS, lower.flight_time, 0.2, 0.01, line_color="black")
        p.rect(originS, upper.flight_time, 0.2, 0.01, line_color="black")

        # outliers
        if not out.empty:
            p.circle(outx, outy, size=6, color="#F38630", fill_alpha=0.6)

        p.xgrid.grid_line_color = None
        p.ygrid.grid_line_color = "white"
        p.grid.grid_line_width = 2
        p.xaxis.major_label_text_font_size = "12pt"
        p.xaxis.major_label_orientation = 3.5 / 2
        p.xaxis.axis_label = ''
        p.yaxis.axis_label = 'Flight Time (minutes)'
        p.title.text = 'Flights That Are Shorter or Longer Than Average'
        p.title.align = "center"
        output_file('mia4x.html')
        #show(p)

        #VIZ V
        dep = tX['diff_dep'].tolist()
        time = tX['flight_time'].tolist()
        airports = tX['origin'].tolist()

        source = ColumnDataSource(
            data=dict(dep=dep, time=time, airports=airports))
        p = figure(title="Flight Time Vs. Time Between Departures",
                   x_range=Range1d(0, 1000))
        p.scatter(x="dep", y="time", size=4, source=source)
        p.xaxis[0].axis_label = "Time Between Flights (Minutes)"
        p.yaxis[0].axis_label = "Flight Time (Minutes)"

        labels = LabelSet(x='dep',
                          y='time',
                          text='airports',
                          level='glyph',
                          x_offset=5,
                          y_offset=5,
                          source=source,
                          render_mode='canvas')

        p.add_layout(labels)
        show(p)
예제 #13
0
def plot_fundingOvertime(df,
                         col1,
                         col2,
                         col_transform=1000000000,
                         left=2015,
                         right=2016.5):
    """return interactive line plot using bokeh"""

    print(
        '\n*** INTERACTIVE MODE: HOVER OVER THE GRAPH TO SEE AWARD TOTALS FOR EACH YEAR***'
    )
    grouped = pd.DataFrame(df.groupby([col1])[col2].sum())
    grouped.reset_index(inplace=True)

    # set amounts by billion dollars
    grouped[col2] = grouped[col2] / col_transform
    source = ColumnDataSource(grouped)

    # initialize the figure
    p = figure(
        plot_width=1000,
        plot_height=450,
        title=
        'Award funding has increased over time with 2011 seeing the largest funding amounts'
    )

    # create the plot
    p.line(x=col1, y=col2, line_width=6, source=source, color='green')

    # set formating parameters
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    p.background_fill_color = "AliceBlue"
    p.title.text_font_size = "16pt"
    p.title.text_color = 'MidnightBlue'
    p.xaxis.axis_label_text_font_size = '15pt'
    p.yaxis.axis_label_text_font_size = '15pt'
    p.yaxis.axis_label = 'Amount awarded in US Billion'
    p.xaxis.major_label_text_font_size = '12pt'

    # add shaded box to highlight year with greatest funding
    box = BoxAnnotation(left=left,
                        right=right,
                        line_width=1,
                        line_color='black',
                        line_dash='dashed',
                        fill_alpha=0.2,
                        fill_color='green')
    # add box to plot
    p.add_layout(box)

    # create label for the box
    label = Label(x=2016,
                  y=6.220,
                  x_offset=12,
                  text="$6.22 b.awarded in 2016",
                  text_baseline="middle")

    # add to plot
    p.add_layout(label)

    # add interactive hover tool that shows the amount awarded
    hover = HoverTool()
    hover.tooltips = [("Total amount awarded ", "@AwardAmount")]

    hover.mode = 'vline'
    p.add_tools(hover)

    # export plots
    _ = export_png(p, filename=img_path / 'fundingovertime.png')
    output_file(img_path / 'fundingovertime.html')

    p.output_backend = "svg"
    export_svgs(p, filename=img_path / "fundingovertime.svg")

    #display plot
    show(p)
예제 #14
0
def plot_wordsOverTime(df,
                       col,
                       column_line=None,
                       operation='count',
                       title='Words over time'):
    """plot count of projects over time"""

    print(
        '\n*** INTERACTIVE MODE: HOVER OVER THE GRAPH TO SEE COUNTS FOR EACH YEAR***'
    )

    # create a subsett of year and number of projects
    counts = df.groupby([col]).agg(operation)
    counts.reset_index(inplace=True)

    # create a column data source to plot in bokeh
    source = ColumnDataSource(counts)

    # initialize the plot
    p = figure(plot_width=1000, plot_height=450, title=title)

    # plot the trend line
    p.line(x=col, y=column_line, line_width=6, source=source)

    # set parameters
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    p.background_fill_color = "AliceBlue"
    p.title.text_font_size = "16pt"
    p.title.text_color = 'MidnightBlue'
    p.xaxis.axis_label_text_font_size = '15pt'
    p.yaxis.axis_label_text_font_size = '15pt'

    p.yaxis.axis_label = 'Total number of words'
    p.xaxis.major_label_text_font_size = '12pt'

    # create annotation
    box = BoxAnnotation(left=2014.5,
                        right=2016.5,
                        line_width=1,
                        line_color='black',
                        line_dash='dashed',
                        fill_alpha=0.2,
                        fill_color='orange')

    # add annotation to plot
    p.add_layout(box)

    # add interactive hover tool
    hover = HoverTool()
    hover.tooltips = [("Total number of words ", "@word_count"),
                      ('year', '@year')]

    hover.mode = 'vline'
    p.add_tools(hover)

    # export plots
    _ = export_png(p, filename=img_path / 'wordsovertime.png')
    output_file(img_path / 'wordsovertime.html')

    p.output_backend = "svg"
    export_svgs(p, filename=img_path / "wordsovertime.svg")

    #display plot
    show(p)
예제 #15
0
def plot_awardsPerDivision(df,
                           title,
                           NSF_org,
                           aggregate=list(),
                           sortby=tuple(),
                           n_directs=5):

    # set a subset to get statistics on awards by directorate
    directorates = df.groupby([NSF_org]).\
    agg((aggregate)).\
    sort_values(sortby,
    ascending = False).head(n_directs)

    directorates.columns = directorates.columns.get_level_values(1)

    directorates.reset_index(inplace=True)

    # set datasource to visualize in Bokeh
    source = ColumnDataSource(directorates)
    direct = source.data[NSF_org].tolist()

    # initialize the plot
    p = figure(x_range=direct,
               plot_width=1000,
               plot_height=550,
               tools='box_select, wheel_zoom, reset, save')

    # set color for each categoory
    color_mapper = CategoricalColorMapper(factors=direct,
                                          palette=[
                                              'MidnightBlue', 'DodgerBlue',
                                              'CornflowerBlue', 'DeepSkyBlue',
                                              'grey'
                                          ])

    # create plot
    p.circle(x=NSF_org,
             y='count',
             source=source,
             color=dict(transform=color_mapper, field=NSF_org),
             nonselection_alpha=0.2,
             size=35,
             legend=NSF_org)

    # set plot formating and displaying options
    p.title.text = title
    p.title.text_font_size = "15pt"
    p.title.text_color = 'MidnightBlue'
    p.xaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.xaxis.axis_label = 'Directorate'
    p.yaxis.axis_label = 'Number of projects funded'
    p.xaxis.major_tick_line_color = None
    p.xaxis.minor_tick_line_color = None
    p.xaxis.major_label_text_font_size = '0pt'  # turn off x-axis tick labels

    # create hover tool
    hover = HoverTool()
    hover.tooltips = [("Directorate ", " @NSF_org"), ("Counts ", "@count"),
                      ("Average funding", "@mean"),
                      ("Max funding awarded ", "@max")]

    hover.mode = 'vline'
    p.add_tools(hover)

    # export plots
    _ = export_png(p, filename=img_path / 'awardsbydir.png')
    output_file(img_path / 'awardsbydir.html')

    p.output_backend = "svg"
    export_svgs(p, filename=img_path / "awardsbydir.svg")

    #display plot
    show(p)
    print(directorates)