예제 #1
0
def figure3d():
    """Represents cost per JR5 download by provider
    Reads in cost data from supplementary file for each provider
    Cost per JR1 download = total cost per provider / # of JR5 downloads by provider
    Represented as "big 7" providers, including Elsevier Freedom and Elsevier Subscribed titles
    
    Chart Type: Bar Graph
    Y-Axis: Cost (in dollars) per JR5 Download
    Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        1figr_U_Virginia_edit_Supp_Data, Total cost for 2017
    X-Axis: Provider Name
    X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        1figr_U_Virginia_edit_Supp_Data, Total cost for 2017
    """

    data = pd.read_excel(filename,
                         sheet_name='Journals per Provider',
                         skiprows=8)

    big7 = [
        'Elsevier', 'Sage', 'Springer', 'Taylor & Francis', 'Wiley',
        'Elsevier Freedom', 'Elsevier Subscribed'
    ]

    stats_by_provider = []

    for provider_name in big7:

        subset_by_provider = data.loc[data['Provider'] == provider_name]

        journals_data = subset_by_provider.groupby(
            'Journal', as_index=False).sum().values.tolist()

        for i in journals_data:
            if i[0] == provider_name:
                jr5_total = i[5]
                stats_by_provider.append(
                    (i[0], jr5_total))  #i[0] = name of provider

    elsevier_freedom_collection = rf.make_freedom_collection_provider()

    elsevier_freedom_jr5_downloads = elsevier_freedom_collection[
        'Downloads JR5 2017 in 2017'].sum()

    stats_by_provider.append(
        ('Elsevier Freedom', elsevier_freedom_jr5_downloads))

    elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider()

    elsevier_subscribed_jr5_downloads = elsevier_subscribed_titles[
        'Downloads JR5 2017 in 2017'].sum()

    stats_by_provider.append(
        ('Elsevier Subscribed', elsevier_subscribed_jr5_downloads))

    #reads cost data per provider from the following supplementary file
    cost_data = pd.read_excel('1figr_U_Virginia_edit_Supp_Data.xlsx')

    cost_per_provider = cost_data.groupby(
        ['Package'], as_index=False).sum().values.tolist()

    cost_per_jr5_download = []

    for stat in stats_by_provider:
        for cost in cost_per_provider:
            if stat[0] == cost[0]:  #first element in each item is the name
                cost_per_jr5_download.append(cost[1] / stat[1])

    #make plot
    mpl.rcParams['ytick.major.width'] = 1
    mpl.rcParams['xtick.major.width'] = 1
    plt.figure(num=None, figsize=(8, 8))
    plt.suptitle(f'Cost per Download, current year 2017 downloads (JR5)')
    plot = plt.bar(big7, cost_per_jr5_download, width=.8, color='green')
    plt.ylabel('Cost (dollars)')
    plt.ylim(0, 37)  #changes top and bottom limit of y axis in plot
    plt.xticks(rotation=90)

    for i in plot:
        score = i.get_height()

        plt.text(i.get_x() + i.get_width() / 2,
                 1.05 * score,
                 '${:,.2f}'.format(score),
                 ha='center',
                 va='bottom')
예제 #2
0
def figure1g():
    """Makes JR80, JR90, JR95 graph for all 6 big providers,
    splitting elsevier into Elseveier Freedom and Elsevier Subscribed.
    
    Plots References (Articles published by your institution, referenced by other authors)
    
    JR80 = Journals which make up 80% of References
    JR90 = Journals which make up 90% of References
    JR95 = Journals which make up 95% of References
    
    Chart Type: Stacked Bar Graph
    Y-Axis: Percent of Total Titles
    Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        Elsevier_2019, Subscribed Journal List 2019
    X-Axis: Provider Name
    X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        Elsevier_2019, Subscribed Journal List 2019
    """

    data = pd.read_excel(filename,
                         sheet_name='Journals per Provider',
                         skiprows=8)

    stats_by_provider = []

    providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley']

    #make stats for existing providers
    for provider_name in providers:

        subset_by_provider = data.loc[data['Provider'] == provider_name]
        journals_data = subset_by_provider.groupby(
            'Journal', as_index=False).sum().values.tolist()

        for i in journals_data:
            if i[0] == provider_name:
                journals_data.remove(i)  #removing aggregator column data

        total_references = 0
        total_journals = 0
        for i in journals_data:
            total_references += i[6]
            total_journals += 1

        reference_tuples = [(i[0], i[6]) for i in journals_data]
        reference_tuples_sorted = sorted(
            reference_tuples, key=lambda i: i[1],
            reverse=True)  #sorts on second element of reference_tuples

        jr80_running_tally = 0  #represents 80% of collections use
        jr90_running_tally = 0
        jr95_running_tally = 0
        jr80_highly_used_journals = []  #THIS HOLDS (JOURNAL NAME, REFERENCES)
        jr90_highly_used_journals = []
        jr95_highly_used_journals = []

        for i in reference_tuples_sorted:
            if jr80_running_tally < (total_references * 0.8):
                jr80_highly_used_journals.append(i)
                jr80_running_tally += i[1]

        for i in reference_tuples_sorted:
            if jr90_running_tally < (total_references * 0.9):
                jr90_highly_used_journals.append(i)
                jr90_running_tally += i[1]

        for i in reference_tuples_sorted:
            if jr95_running_tally < (total_references * 0.95):
                jr95_highly_used_journals.append(i)
                jr95_running_tally += i[1]

        jr80_score = (len(jr80_highly_used_journals)) / (total_journals)
        jr90_score = (len(jr90_highly_used_journals)) / (total_journals)
        jr90_score = (jr90_score - jr80_score)
        jr95_score = (len(jr95_highly_used_journals)) / (total_journals)
        jr95_score = (jr95_score - (jr80_score + jr90_score))

        total_score = (1 - (jr80_score + jr90_score + jr95_score))

        stats_by_provider.append(
            (provider_name, jr80_score, jr90_score, jr95_score, total_score,
             len(jr80_highly_used_journals), len(jr90_highly_used_journals),
             len(jr95_highly_used_journals)))

    unmatched_collection_provider = rf.make_elsevier_unmatched_provider()
    unmatched_collection_provider[
        'Provider Name'] = 'Elsevier Unmatched'  #need to create a column which holds provider name

    subscribed_titles_provider = rf.make_elsevier_subscribed_titles_provider()
    subscribed_titles_provider[
        'Provider Name'] = 'Elsevier Subscribed'  #need to create a column which holds provider name

    freedom_collection_provider = rf.make_freedom_collection_provider()
    freedom_collection_provider[
        'Provider Name'] = 'Elsevier Freedom'  #need to create a column which holds provider name

    elsevier_providers = [
        unmatched_collection_provider, subscribed_titles_provider,
        freedom_collection_provider
    ]

    for provider_name in elsevier_providers:

        first_row = provider_name.iloc[1]
        name = first_row[
            'Provider Name']  #need string of provider name for stats_by_provider

        journals_data = provider_name.groupby(
            'Journal', as_index=False).sum().values.tolist()

        total_references = 0
        total_journals = 0
        for i in journals_data:
            total_references += i[6]
            total_journals += 1

        reference_tuples = [(i[0], i[6]) for i in journals_data]
        reference_tuples_sorted = sorted(
            reference_tuples, key=lambda i: i[1],
            reverse=True)  #sorts on second element of reference_tuples

        jr80_running_tally = 0  #represents 80% of collections use
        jr90_running_tally = 0
        jr95_running_tally = 0
        jr80_highly_used_journals = []  #THIS HOLDS (JOURNAL NAME, REFERENCES)
        jr90_highly_used_journals = []
        jr95_highly_used_journals = []

        for i in reference_tuples_sorted:
            if jr80_running_tally < (total_references * 0.8):
                jr80_highly_used_journals.append(i)
                jr80_running_tally += i[1]

        for i in reference_tuples_sorted:
            if jr90_running_tally < (total_references * 0.9):
                jr90_highly_used_journals.append(i)
                jr90_running_tally += i[1]

        for i in reference_tuples_sorted:
            if jr95_running_tally < (total_references * 0.95):
                jr95_highly_used_journals.append(i)
                jr95_running_tally += i[1]

        jr80_score = (len(jr80_highly_used_journals)) / (total_journals)
        jr90_score = (len(jr90_highly_used_journals)) / (total_journals)
        jr90_score = (jr90_score - jr80_score)
        jr95_score = (len(jr95_highly_used_journals)) / (total_journals)
        jr95_score = (jr95_score - (jr80_score + jr90_score))

        total_score = (1 - (jr80_score + jr90_score + jr95_score))

        stats_by_provider.append(
            (name, jr80_score, jr90_score, jr95_score, total_score,
             len(jr80_highly_used_journals), len(jr90_highly_used_journals),
             len(jr95_highly_used_journals)))

    #make plot
    plt.figure(num=None, figsize=(10, 10))
    plt.suptitle('Percentage of Titles Referenced by Provider (References)')
    plt.ylabel('Percent of total titles')
    plt.gca().yaxis.set_major_formatter(
        StrMethodFormatter('{x:,.0%}'))  #formats y axis as %

    jr80s = mpatches.Patch(color='violet', label='JR80 titles')
    jr90s = mpatches.Patch(color='moccasin', label='JR90 titles')
    jr95s = mpatches.Patch(color='paleturquoise', label='JR95 titles')
    others = mpatches.Patch(color='silver', label='Total titles')

    plt.legend(handles=[jr80s, jr90s, jr95s, others],
               bbox_to_anchor=(1, 1))  #moves legend outside plot
    plt.xticks(rotation=45)

    #NEED TO ADD LABELS TO PLOTS
    for i in stats_by_provider:

        provider = i[0]
        jr80 = i[1]
        jr90 = i[2]
        jr95 = i[3]
        total_values = i[4]

        plot1 = plt.bar(provider, jr80, color='violet')
        plot2 = plt.bar(provider, jr90, bottom=jr80, color='moccasin')
        plot3 = plt.bar(provider,
                        jr95,
                        bottom=(jr80 + jr90),
                        color='paleturquoise')
        plot4 = plt.bar(provider,
                        total_values,
                        bottom=(jr80 + jr90 + jr95),
                        color='silver')
예제 #3
0
def figure2b():
    """A measurement of currency. Compares JR5 downloads to JR1 downloads for each of the big 7 providers.
    JR5 downloads are 2017 articles downloaded in 2017.
    JR1 downloads are all years articles downloaded in 2017.
    We want to see what % of current articles people are downloading.
    Adds the 'Elsevier Freedom Collection' and 'Elsevier Subscribed Titles', making it the big 7 providers

    Chart Type: Bar Graph
    Y-Axis: Percent of Total
    Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        Elsevier_2019, Subscribed Journal List 2019
    X-Axis: Provider Names
    X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        Elsevier_2019, Subscribed Journal List 2019
    
    """
    
    data = pd.read_excel(filename, sheet_name='Journals per Provider', skiprows=8)
    
    big7 = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley', 'Elsevier Freedom Collection', 'Elsevier Subscribed Titles', 'Elsevier Unmatched']
    
    percent_jr5_of_jr1 = []
    
    for provider_name in big7:
        
        subset_by_provider = data.loc[data['Provider'] == provider_name]

        journals_data = subset_by_provider.groupby('Journal', as_index=False).sum().values.tolist()
        
        for i in journals_data:
            if i[0] == provider_name:
                jr1_total = i[4]
                jr5_total = i[5]
                ratio = jr5_total/jr1_total
                percent_jr5_of_jr1.append(ratio)
                
                
    #make ratio of jr5 to jr1 downloads for elsevier freedom collection
    elsevier_freedom_collection = rf.make_freedom_collection_provider()

    elsevier_freedom_jr5_downloads = elsevier_freedom_collection['Downloads JR5 2017 in 2017'].sum()
    elsevier_freedom_jr1_downloads = elsevier_freedom_collection['Downloads JR1 2017'].sum()

    elsevier_freedom_ratio = elsevier_freedom_jr5_downloads/elsevier_freedom_jr1_downloads
    percent_jr5_of_jr1.append(elsevier_freedom_ratio)
    
    #make ratio of jr5 to jr1 downloads for elsevier subscribed titles
    elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider()
    
    elsevier_subscribed_jr5_downloads = elsevier_subscribed_titles['Downloads JR5 2017 in 2017'].sum()
    elsevier_subscribed_jr1_downloads = elsevier_subscribed_titles['Downloads JR1 2017'].sum()

    elsevier_subscribed_ratio = elsevier_subscribed_jr5_downloads/elsevier_subscribed_jr1_downloads
    percent_jr5_of_jr1.append(elsevier_subscribed_ratio)
    
    #make ratio of jr5 to jr1 downloads for elsevier unmatched titles
    elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider()
    
    elsevier_unmatched_jr5_downloads = elsevier_unmatched_titles['Downloads JR5 2017 in 2017'].sum()
    elsevier_unmatched_jr1_downloads = elsevier_unmatched_titles['Downloads JR1 2017'].sum()

    elsevier_unmatched_ratio = elsevier_unmatched_jr5_downloads/elsevier_unmatched_jr1_downloads
    percent_jr5_of_jr1.append(elsevier_unmatched_ratio)

                              
    mpl.rcParams['ytick.major.width'] = 1
    mpl.rcParams['xtick.major.width'] = 1
    plt.figure(num=None, figsize=(8,8))
    plt.suptitle(f'Percent JR5 downloads of JR1 downloads (for 2017)')
    plot = plt.bar(big7, percent_jr5_of_jr1, width=.8, color='green')   
    plt.ylabel('Percent of Total')
    plt.ylim(0, 1)  #changes top and bottom limit of y axis in plot
    plt.xticks(rotation=90)

    plt.gca().yaxis.set_major_formatter(StrMethodFormatter('{x:,.0%}'))    #formats y axis as %

    for i in plot:
        score = i.get_height()
        
        plt.text(i.get_x() + i.get_width()/2, 
                 1.05 * score, 
                 '{:.1%}'.format(score),
                 ha='center',
                 va='bottom')
예제 #4
0
def figure4b_references():
    """ Show references per year (2008-2017) by your institution's affiliated authors, separating Elsevier Freedom
    and Elsevier Subscribed titles out from Elsevier as a whole.
    
    Chart Type: Line Graph
    Y-Axis: Number of References
    Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, References to journal/provider by your institution's authors (as measured in Scopus)
                        Elsevier_2019, Subscribed Journal List 2019
    X-Axis: Year
    X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        Elsevier_2019, Subscribed Journal List 2019 
    """

    original_1figr_dataset = pd.read_excel(filename,
                                           sheet_name='Journals per Provider',
                                           skiprows=8)

    elsevier_freedom_collection = rf.make_freedom_collection_provider()
    elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider()
    elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider()

    #this holds reference totals for all providers in the end, which is used to make final plot
    ref_by_provider = []

    #populate references totals for elsevier subset providers
    elsevier_providers = [
        elsevier_freedom_collection, elsevier_subscribed_titles,
        elsevier_unmatched_titles
    ]

    for provider_name in elsevier_providers:

        ref_by_year = []

        ref_2008 = provider_name['2008.1'].tolist()
        ref_by_year.append(sum(ref_2008))
        ref_2009 = provider_name['2009.1'].tolist()
        ref_by_year.append(sum(ref_2009))
        ref_2010 = provider_name['2010.1'].tolist()
        ref_by_year.append(sum(ref_2010))
        ref_2011 = provider_name['2011.1'].tolist()
        ref_by_year.append(sum(ref_2011))
        ref_2012 = provider_name['2012.1'].tolist()
        ref_by_year.append(sum(ref_2012))
        ref_2013 = provider_name['2013.1'].tolist()
        ref_by_year.append(sum(ref_2013))
        ref_2014 = provider_name['2014.1'].tolist()
        ref_by_year.append(sum(ref_2014))
        ref_2015 = provider_name['2015.1'].tolist()
        ref_by_year.append(sum(ref_2015))
        ref_2016 = provider_name['2016.1'].tolist()
        ref_by_year.append(sum(ref_2016))
        ref_2017 = provider_name['2017.1'].tolist()
        ref_by_year.append(sum(ref_2017))

        ref_by_provider.append(ref_by_year)

    #populate references totals for other providers
    providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley']

    for provider_name in providers:

        subset_by_provider = original_1figr_dataset.loc[
            original_1figr_dataset['Provider'] == provider_name]

        ref_by_year = []

        ref_2008 = subset_by_provider['2008.1'].tolist()
        ref_by_year.append(ref_2008[0])
        ref_2009 = subset_by_provider['2009.1'].tolist()
        ref_by_year.append(ref_2009[0])
        ref_2010 = subset_by_provider['2010.1'].tolist()
        ref_by_year.append(ref_2010[0])
        ref_2011 = subset_by_provider['2011.1'].tolist()
        ref_by_year.append(ref_2011[0])
        ref_2012 = subset_by_provider['2012.1'].tolist()
        ref_by_year.append(ref_2012[0])
        ref_2013 = subset_by_provider['2013.1'].tolist()
        ref_by_year.append(ref_2013[0])
        ref_2014 = subset_by_provider['2014.1'].tolist()
        ref_by_year.append(ref_2014[0])
        ref_2015 = subset_by_provider['2015.1'].tolist()
        ref_by_year.append(ref_2015[0])
        ref_2016 = subset_by_provider['2016.1'].tolist()
        ref_by_year.append(ref_2016[0])
        ref_2017 = subset_by_provider['2017.1'].tolist()
        ref_by_year.append(ref_2017[0])

        ref_by_provider.append(ref_by_year)

    years = [
        '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
        '2017'
    ]

    plt.figure(num=None, figsize=(10, 10))
    plt.suptitle(
        f'Number of References Made by {your_institution} Researchers by Provider'
    )
    plt.xlabel('Year')
    plt.ylabel('Number References')
    plt.ylim(0, 12000)

    plt.plot(years,
             ref_by_provider[0],
             label='Elsevier Freedom',
             color='red',
             linestyle='dashed')
    plt.plot(years,
             ref_by_provider[1],
             label='Elsevier Subscribed',
             color='red')
    plt.plot(years,
             ref_by_provider[2],
             label='Elsevier Unmatched',
             color='black')
    plt.plot(years, ref_by_provider[3], label='Sage', color='blue')
    plt.plot(years, ref_by_provider[4], label='Springer', color='green')
    plt.plot(years,
             ref_by_provider[5],
             label='Taylor & Francis',
             color='purple')
    plt.plot(years, ref_by_provider[6], label='Wiley', color='orange')

    plt.legend()
예제 #5
0
def figure4b_percentage():
    """ Show percent references per year as a part of all references for each provider, separating Elsevier Freedom
    and Elsevier Subscribed titles out from Elsevier as a whole.
    
    Chart Type: Line Graph
    Y-Axis: Number of References
    Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, References to journal/provider by your institution's authors (as measured in Scopus)
                        Elsevier_2019, Subscribed Journal List 2019
    X-Axis: Year
    X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        Elsevier_2019, Subscribed Journal List 2019 
    """

    original_1figr_dataset = pd.read_excel(filename,
                                           sheet_name='Journals per Provider',
                                           skiprows=8)

    all_providers = original_1figr_dataset['Provider'].unique(
    )  #makes list of unique providers

    #build total references for all providers by year
    sum_2008 = 0
    sum_2009 = 0
    sum_2010 = 0
    sum_2011 = 0
    sum_2012 = 0
    sum_2013 = 0
    sum_2014 = 0
    sum_2015 = 0
    sum_2016 = 0
    sum_2017 = 0

    for provider_name in all_providers:

        subset_by_provider = original_1figr_dataset.loc[
            original_1figr_dataset['Provider'] == provider_name]

        ref_2008 = subset_by_provider['2008.1'].tolist()
        sum_2008 += ref_2008[0]
        ref_2009 = subset_by_provider['2009.1'].tolist()
        sum_2009 += ref_2009[0]
        ref_2010 = subset_by_provider['2010.1'].tolist()
        sum_2010 += ref_2010[0]
        ref_2011 = subset_by_provider['2011.1'].tolist()
        sum_2011 += ref_2011[0]
        ref_2012 = subset_by_provider['2012.1'].tolist()
        sum_2012 += ref_2012[0]
        ref_2013 = subset_by_provider['2013.1'].tolist()
        sum_2013 += ref_2013[0]
        ref_2014 = subset_by_provider['2014.1'].tolist()
        sum_2014 += ref_2014[0]
        ref_2015 = subset_by_provider['2015.1'].tolist()
        sum_2015 += ref_2015[0]
        ref_2016 = subset_by_provider['2016.1'].tolist()
        sum_2016 += ref_2016[0]
        ref_2017 = subset_by_provider['2017.1'].tolist()
        sum_2017 += ref_2017[0]

    #build references by provider for each year
    providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley']

    ref_by_provider = [
    ]  #this holds percentage of total references for each year by provider, which is later plotted

    for provider_name in providers:

        subset_by_provider = original_1figr_dataset.loc[
            original_1figr_dataset['Provider'] == provider_name]

        ref_by_year = []

        ref_2008 = subset_by_provider['2008.1'].tolist()
        ref_by_year.append(ref_2008[0] / sum_2008)
        ref_2009 = subset_by_provider['2009.1'].tolist()
        ref_by_year.append(ref_2009[0] / sum_2009)
        ref_2010 = subset_by_provider['2010.1'].tolist()
        ref_by_year.append(ref_2010[0] / sum_2010)
        ref_2011 = subset_by_provider['2011.1'].tolist()
        ref_by_year.append(ref_2011[0] / sum_2011)
        ref_2012 = subset_by_provider['2012.1'].tolist()
        ref_by_year.append(ref_2012[0] / sum_2012)
        ref_2013 = subset_by_provider['2013.1'].tolist()
        ref_by_year.append(ref_2013[0] / sum_2013)
        ref_2014 = subset_by_provider['2014.1'].tolist()
        ref_by_year.append(ref_2014[0] / sum_2014)
        ref_2015 = subset_by_provider['2015.1'].tolist()
        ref_by_year.append(ref_2015[0] / sum_2015)
        ref_2016 = subset_by_provider['2016.1'].tolist()
        ref_by_year.append(ref_2016[0] / sum_2016)
        ref_2017 = subset_by_provider['2017.1'].tolist()
        ref_by_year.append(ref_2017[0] / sum_2017)

        ref_by_provider.append(ref_by_year)

    #Calculate number of references for Elsevier Freedom and Elsevier Subscribed titles
    elsevier_freedom_collection = rf.make_freedom_collection_provider()
    elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider()
    elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider()

    elsevier_providers = [
        elsevier_freedom_collection, elsevier_subscribed_titles,
        elsevier_unmatched_titles
    ]

    for provider_name in elsevier_providers:

        ref_by_year = []

        ref_2008 = provider_name['2008.1'].tolist()
        ref_by_year.append((sum(ref_2008)) / sum_2008)
        ref_2009 = provider_name['2009.1'].tolist()
        ref_by_year.append((sum(ref_2009)) / sum_2009)
        ref_2010 = provider_name['2010.1'].tolist()
        ref_by_year.append((sum(ref_2010)) / sum_2010)
        ref_2011 = provider_name['2011.1'].tolist()
        ref_by_year.append((sum(ref_2011)) / sum_2011)
        ref_2012 = provider_name['2012.1'].tolist()
        ref_by_year.append((sum(ref_2012)) / sum_2012)
        ref_2013 = provider_name['2013.1'].tolist()
        ref_by_year.append((sum(ref_2013)) / sum_2013)
        ref_2014 = provider_name['2014.1'].tolist()
        ref_by_year.append((sum(ref_2014)) / sum_2014)
        ref_2015 = provider_name['2015.1'].tolist()
        ref_by_year.append((sum(ref_2015)) / sum_2015)
        ref_2016 = provider_name['2016.1'].tolist()
        ref_by_year.append((sum(ref_2016)) / sum_2016)
        ref_2017 = provider_name['2017.1'].tolist()
        ref_by_year.append((sum(ref_2017)) / sum_2017)

        ref_by_provider.append(ref_by_year)

    years = [
        '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
        '2017'
    ]

    plt.figure(num=None, figsize=(10, 10))
    plt.suptitle(
        f'Percent of All References Made by {your_institution} Authors')
    plt.xlabel('Year')
    plt.ylabel('Percentage')

    plt.gca().yaxis.set_major_formatter(
        StrMethodFormatter('{x:,.0%}'))  #formats y axis as %

    plt.plot(years, ref_by_provider[0], label='Sage', color='blue')
    plt.plot(years, ref_by_provider[1], label='Springer', color='green')
    plt.plot(years,
             ref_by_provider[2],
             label='Taylor & Francis',
             color='purple')
    plt.plot(years, ref_by_provider[3], label='Wiley', color='orange')
    plt.plot(years,
             ref_by_provider[4],
             label='Elsevier Freedom',
             color='red',
             linestyle='dashed')
    plt.plot(years,
             ref_by_provider[5],
             label='Elsevier Subscribed',
             color='red')
    plt.plot(years,
             ref_by_provider[6],
             label='Elsevier Unmatched',
             color='black')

    plt.legend(loc='center left', bbox_to_anchor=(1, 0.8))
예제 #6
0
def figure6b_percent_oa_articles():
    """ Show percent papers per year published open access per provider per year as a percentage of all papers
    published per provider per year. Separates elsevier freedom an elsevier subscribed titles out from the whole
    
    Chart Type: Line Graph
    Y-Axis: Number of Papers
    Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, OA papers in 1findr per journal/provider (intersection with Scopus)
                        Elsevier_2019, Subscribed Journal List 2019
    X-Axis: Year
    X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        Elsevier_2019, Subscribed Journal List 2019 

    """

    original_1figr_dataset = pd.read_excel(filename,
                                           sheet_name='Journals per Provider',
                                           skiprows=8)

    #holds percent papers published open access per year per provider
    percent_oa_papers_by_provider = []

    #populate % oa papers totals for other providers
    providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley']

    for provider_name in providers:

        percent_oa_papers_by_year = []

        subset_by_provider = original_1figr_dataset.loc[
            original_1figr_dataset['Provider'] == provider_name]

        oa_papers_2008 = subset_by_provider['2008.3'].tolist()
        percent_oa_papers_by_year.append(oa_papers_2008[0])
        oa_papers_2009 = subset_by_provider['2009.3'].tolist()
        percent_oa_papers_by_year.append(oa_papers_2009[0])
        oa_papers_2010 = subset_by_provider['2010.3'].tolist()
        percent_oa_papers_by_year.append(oa_papers_2010[0])
        oa_papers_2011 = subset_by_provider['2011.3'].tolist()
        percent_oa_papers_by_year.append(oa_papers_2011[0])
        oa_papers_2012 = subset_by_provider['2012.3'].tolist()
        percent_oa_papers_by_year.append(oa_papers_2012[0])
        oa_papers_2013 = subset_by_provider['2013.3'].tolist()
        percent_oa_papers_by_year.append(oa_papers_2013[0])
        oa_papers_2014 = subset_by_provider['2014.3'].tolist()
        percent_oa_papers_by_year.append(oa_papers_2014[0])
        oa_papers_2015 = subset_by_provider['2015.3'].tolist()
        percent_oa_papers_by_year.append(oa_papers_2015[0])
        oa_papers_2016 = subset_by_provider['2016.3'].tolist()
        percent_oa_papers_by_year.append(oa_papers_2016[0])
        oa_papers_2017 = subset_by_provider['2017.3'].tolist()
        percent_oa_papers_by_year.append(oa_papers_2017[0])

        percent_oa_papers_by_provider.append(percent_oa_papers_by_year)

    #populate % oa papers totals for elsevier freedom and elsevier subscribed titles
    #this is # of OA papers divided by total papers
    elsevier_freedom_collection = rf.make_freedom_collection_provider()
    elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider()
    elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider()

    elsevier_providers = [
        elsevier_freedom_collection, elsevier_subscribed_titles,
        elsevier_unmatched_titles
    ]

    for provider_name in elsevier_providers:

        percent_oa_papers_by_year = []

        oa_papers_2008 = provider_name['2008.2'].tolist()
        total_2008 = provider_name['2008.4'].tolist()
        percent_oa_papers_by_year.append(sum(oa_papers_2008) / sum(total_2008))
        oa_papers_2009 = provider_name['2009.2'].tolist()
        total_2009 = provider_name['2009.4'].tolist()
        percent_oa_papers_by_year.append(sum(oa_papers_2009) / sum(total_2009))
        oa_papers_2010 = provider_name['2010.2'].tolist()
        total_2010 = provider_name['2010.4'].tolist()
        percent_oa_papers_by_year.append(sum(oa_papers_2010) / sum(total_2010))
        oa_papers_2011 = provider_name['2010.2'].tolist()
        total_2011 = provider_name['2011.4'].tolist()
        percent_oa_papers_by_year.append(sum(oa_papers_2011) / sum(total_2011))
        oa_papers_2012 = provider_name['2012.2'].tolist()
        total_2012 = provider_name['2012.4'].tolist()
        percent_oa_papers_by_year.append(sum(oa_papers_2012) / sum(total_2012))
        oa_papers_2013 = provider_name['2013.2'].tolist()
        total_2013 = provider_name['2013.4'].tolist()
        percent_oa_papers_by_year.append(sum(oa_papers_2013) / sum(total_2013))
        oa_papers_2014 = provider_name['2014.2'].tolist()
        total_2014 = provider_name['2014.4'].tolist()
        percent_oa_papers_by_year.append(sum(oa_papers_2014) / sum(total_2014))
        oa_papers_2015 = provider_name['2015.2'].tolist()
        total_2015 = provider_name['2015.4'].tolist()
        percent_oa_papers_by_year.append(sum(oa_papers_2015) / sum(total_2015))
        oa_papers_2016 = provider_name['2016.2'].tolist()
        total_2016 = provider_name['2016.4'].tolist()
        percent_oa_papers_by_year.append(sum(oa_papers_2016) / sum(total_2016))
        oa_papers_2017 = provider_name['2017.2'].tolist()
        total_2017 = provider_name['2017.4'].tolist()
        percent_oa_papers_by_year.append(sum(oa_papers_2017) / sum(total_2017))

        percent_oa_papers_by_provider.append(percent_oa_papers_by_year)

    years = [
        '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
        '2017'
    ]

    plt.figure(num=None, figsize=(10, 10))
    plt.suptitle(f'Percent of all Open Access by provider')
    plt.xlabel('Year')
    plt.ylabel('Percentage')

    plt.gca().yaxis.set_major_formatter(
        StrMethodFormatter('{x:,.0%}'))  #formats y axis as %

    plt.plot(years,
             percent_oa_papers_by_provider[0],
             label='Sage',
             color='blue')
    plt.plot(years,
             percent_oa_papers_by_provider[1],
             label='Springer',
             color='green')
    plt.plot(years,
             percent_oa_papers_by_provider[2],
             label='Taylor & Francis',
             color='purple')
    plt.plot(years,
             percent_oa_papers_by_provider[3],
             label='Wiley',
             color='orange')
    plt.plot(years,
             percent_oa_papers_by_provider[4],
             label='Elsevier Freedom',
             color='red',
             linestyle='dashed')
    plt.plot(years,
             percent_oa_papers_by_provider[5],
             label='Elsevier Subscribed',
             color='red')
    plt.plot(years,
             percent_oa_papers_by_provider[6],
             label='Elsevier Unmatched',
             color='black')

    plt.legend(loc='center left', bbox_to_anchor=(1, 0.8))
예제 #7
0
def figure6b_oa_available_articles():
    """Show number Open Access (OA) available articles per provider over time (2008-2017) for provider,
    separating our Elsevier subscribed and Elsevier Freedom collection from Elsevier as a whole
    
    Chart Type: Line Graph
    Y-Axis: Number of Open access Articles
    Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, OA papers in 1findr per journal/provider (intersection with Scopus)
                        Elsevier_2019, Subscribed Journal List 2019
    X-Axis: Year
    X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        Elsevier_2019, Subscribed Journal List 2019 
    """

    original_1figr_dataset = pd.read_excel(filename,
                                           sheet_name='Journals per Provider',
                                           skiprows=8)

    elsevier_freedom_collection = rf.make_freedom_collection_provider()
    elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider()
    elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider()

    oa_articles_by_provider = []

    elsevier_providers = [
        elsevier_freedom_collection, elsevier_subscribed_titles,
        elsevier_unmatched_titles
    ]

    for provider_name in elsevier_providers:

        oa_articles_by_year = []

        oa_articles_2008 = provider_name['2008.2'].tolist()
        oa_articles_by_year.append(sum(oa_articles_2008))
        oa_articles_2009 = provider_name['2009.2'].tolist()
        oa_articles_by_year.append(sum(oa_articles_2009))
        oa_articles_2010 = provider_name['2010.2'].tolist()
        oa_articles_by_year.append(sum(oa_articles_2010))
        oa_articles_2011 = provider_name['2011.2'].tolist()
        oa_articles_by_year.append(sum(oa_articles_2011))
        oa_articles_2012 = provider_name['2012.2'].tolist()
        oa_articles_by_year.append(sum(oa_articles_2012))
        oa_articles_2013 = provider_name['2013.2'].tolist()
        oa_articles_by_year.append(sum(oa_articles_2013))
        oa_articles_2014 = provider_name['2014.2'].tolist()
        oa_articles_by_year.append(sum(oa_articles_2014))
        oa_articles_2015 = provider_name['2015.2'].tolist()
        oa_articles_by_year.append(sum(oa_articles_2015))
        oa_articles_2016 = provider_name['2016.2'].tolist()
        oa_articles_by_year.append(sum(oa_articles_2016))
        oa_articles_2017 = provider_name['2017.2'].tolist()
        oa_articles_by_year.append(sum(oa_articles_2017))

        oa_articles_by_provider.append(oa_articles_by_year)

    providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley']

    for provider_name in providers:

        subset_by_provider = original_1figr_dataset.loc[
            original_1figr_dataset['Provider'] == provider_name]

        oa_articles_by_year = []

        oa_articles_2008 = subset_by_provider['2008.2'].tolist()
        oa_articles_by_year.append(oa_articles_2008[0])
        oa_articles_2009 = subset_by_provider['2009.2'].tolist()
        oa_articles_by_year.append(oa_articles_2009[0])
        oa_articles_2010 = subset_by_provider['2010.2'].tolist()
        oa_articles_by_year.append(oa_articles_2010[0])
        oa_articles_2011 = subset_by_provider['2011.2'].tolist()
        oa_articles_by_year.append(oa_articles_2011[0])
        oa_articles_2012 = subset_by_provider['2012.2'].tolist()
        oa_articles_by_year.append(oa_articles_2012[0])
        oa_articles_2013 = subset_by_provider['2013.2'].tolist()
        oa_articles_by_year.append(oa_articles_2013[0])
        oa_articles_2014 = subset_by_provider['2014.2'].tolist()
        oa_articles_by_year.append(oa_articles_2014[0])
        oa_articles_2015 = subset_by_provider['2015.2'].tolist()
        oa_articles_by_year.append(oa_articles_2015[0])
        oa_articles_2016 = subset_by_provider['2016.2'].tolist()
        oa_articles_by_year.append(oa_articles_2016[0])
        oa_articles_2017 = subset_by_provider['2017.2'].tolist()
        oa_articles_by_year.append(oa_articles_2017[0])

        oa_articles_by_provider.append(oa_articles_by_year)

    years = [
        '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
        '2017'
    ]

    plt.figure(num=None, figsize=(10, 10))
    plt.suptitle(f'Number of OA-Available Articles')
    plt.xlabel('Year')
    plt.ylabel('Number of Articles')

    plt.plot(years,
             oa_articles_by_provider[0],
             label='Elsevier Freedom',
             color='red',
             linestyle='dashed')
    plt.plot(years,
             oa_articles_by_provider[1],
             label='Elsevier Subscribed',
             color='red')
    plt.plot(years,
             oa_articles_by_provider[2],
             label='Elsevier Unmatched',
             color='black')
    plt.plot(years, oa_articles_by_provider[3], label='Sage', color='blue')
    plt.plot(years,
             oa_articles_by_provider[4],
             label='Springer',
             color='green')
    plt.plot(years,
             oa_articles_by_provider[5],
             label='Taylor & Francis',
             color='purple')
    plt.plot(years, oa_articles_by_provider[6], label='Wiley', color='orange')

    plt.legend(loc='center left', bbox_to_anchor=(1, 0.8))
예제 #8
0
def figure7e():
    """This is counting JR1 downloads by Domain for the subscribed titles and freedom collection providers
    
    Chart Type: Dot Plot/Scatter Plot
    Y-Axis: Domain
    Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        Elsevier_2019, Subscribed Journal List 2019
    X-Axis: Number of JR1 Downloads
    X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        Elsevier_2019, Subscribed Journal List 2019
    """

    original_1figr_data = pd.read_excel(filename,
                                        sheet_name='Journals per Provider',
                                        skiprows=8)
    subscribed_titles_provider = rf.make_elsevier_subscribed_titles_provider()
    freedom_collection_provider = rf.make_freedom_collection_provider()

    unique_domains = original_1figr_data['Domain'].unique().tolist()

    for i in unique_domains:  #this drops the 'nan' column from the unique domains
        if type(i) == float:
            unique_domains.remove(i)
    unique_domains.sort()

    stats_by_domain = []

    #builds list of tuples containing (Domain name, # of subscribed titles JR1 downloads, # of Freedom titles downloads)
    for domain in unique_domains:

        subscribed_subset_by_domain = subscribed_titles_provider.loc[
            subscribed_titles_provider['Domain'] == domain]
        subscribed_jr1_downloads = subscribed_subset_by_domain[
            'Downloads JR1 2017'].sum()

        freedom_subset_by_domain = freedom_collection_provider.loc[
            freedom_collection_provider['Domain'] == domain]
        freedom_jr1_downloads = freedom_subset_by_domain[
            'Downloads JR1 2017'].sum()

        stats_by_domain.append(
            (domain, subscribed_jr1_downloads, freedom_jr1_downloads))

    #sorts domains by sum of total # of jr1 downloads
    arrangement = sorted(stats_by_domain, key=lambda x: (x[1] + x[2]))

    #splitting elements of tuples in arrangement into lists so it is easier to plot
    domains_list = [i[0] for i in arrangement]
    subscribed_total = [i[1] for i in arrangement]
    freedom_total = [i[2] for i in arrangement]

    #make plot
    mpl.rcParams['ytick.major.width'] = 1
    mpl.rcParams['xtick.major.width'] = 1
    plt.figure(num=None, figsize=(8, 8))
    plt.suptitle(f'JR1 downloads by Domain')

    plot = plt.scatter(subscribed_total, domains_list, color='blue')
    plot2 = plt.scatter(freedom_total, domains_list, color='orange')

    subscribed_legend_label = mpatches.Patch(color='blue',
                                             label='Elsevier Subscribed')
    freedom_legend_label = mpatches.Patch(color='orange',
                                          label='Elsevier Freedom')
    plt.xlabel('Number of JR1 Downloads')
    plt.legend(loc='lower right',
               handles=[subscribed_legend_label, freedom_legend_label])
예제 #9
0
def figure5b_percentage():
    """ Show percent papers per year published by UVA authors as a percentage of all papers for each provider, separating Elsevier Freedom
    and Elsevier Subscribed titles out from Elsevier as a whole. All papers are the 'total papers in scopus' columns
    
    Chart Type: Line Graph
    Y-Axis: Number of Papers
    Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Total papers in Scopus per journal/provider
                        Elsevier_2019, Subscribed Journal List 2019
    X-Axis: Year
    X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        Elsevier_2019, Subscribed Journal List 2019 
    """

    original_1figr_dataset = pd.read_excel(filename,
                                           sheet_name='Journals per Provider',
                                           skiprows=8)

    #holds percent papers per year published by UVA authors of total papers per provider
    percent_papers_by_provider = []

    #populate papers totals for other providers
    providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley']

    for provider_name in providers:

        percent_papers_by_year = []

        subset_by_provider = original_1figr_dataset.loc[
            original_1figr_dataset['Provider'] == provider_name]

        papers_2008 = subset_by_provider[2008].tolist()
        total_2008 = subset_by_provider['2008.4'].tolist()
        percent_papers_by_year.append(sum(papers_2008) / sum(total_2008))
        papers_2009 = subset_by_provider[2009].tolist()
        total_2009 = subset_by_provider['2009.4'].tolist()
        percent_papers_by_year.append(sum(papers_2009) / sum(total_2009))
        papers_2010 = subset_by_provider[2010].tolist()
        total_2010 = subset_by_provider['2010.4'].tolist()
        percent_papers_by_year.append(sum(papers_2010) / sum(total_2010))
        papers_2011 = subset_by_provider[2011].tolist()
        total_2011 = subset_by_provider['2011.4'].tolist()
        percent_papers_by_year.append(sum(papers_2011) / sum(total_2011))
        papers_2012 = subset_by_provider[2012].tolist()
        total_2012 = subset_by_provider['2012.4'].tolist()
        percent_papers_by_year.append(sum(papers_2012) / sum(total_2012))
        papers_2013 = subset_by_provider[2013].tolist()
        total_2013 = subset_by_provider['2013.4'].tolist()
        percent_papers_by_year.append(sum(papers_2013) / sum(total_2013))
        papers_2014 = subset_by_provider[2014].tolist()
        total_2014 = subset_by_provider['2014.4'].tolist()
        percent_papers_by_year.append(sum(papers_2014) / sum(total_2014))
        papers_2015 = subset_by_provider[2015].tolist()
        total_2015 = subset_by_provider['2015.4'].tolist()
        percent_papers_by_year.append(sum(papers_2015) / sum(total_2015))
        papers_2016 = subset_by_provider[2016].tolist()
        total_2016 = subset_by_provider['2016.4'].tolist()
        percent_papers_by_year.append(sum(papers_2016) / sum(total_2016))
        papers_2017 = subset_by_provider[2017].tolist()
        total_2017 = subset_by_provider['2017.4'].tolist()
        percent_papers_by_year.append(sum(papers_2017) / sum(total_2017))

        percent_papers_by_provider.append(percent_papers_by_year)

    #populate papers totals for elsevier freedom and elsevier subscribed providers
    elsevier_freedom_collection = rf.make_freedom_collection_provider()
    elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider()
    elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider()

    elsevier_providers = [
        elsevier_freedom_collection, elsevier_subscribed_titles,
        elsevier_unmatched_titles
    ]

    for provider_name in elsevier_providers:

        percent_papers_by_year = []

        papers_2008 = provider_name[2008].tolist()
        total_2008 = provider_name['2008.4'].tolist()
        percent_papers_by_year.append(sum(papers_2008) / sum(total_2008))
        papers_2009 = provider_name[2009].tolist()
        total_2009 = provider_name['2009.4'].tolist()
        percent_papers_by_year.append(sum(papers_2009) / sum(total_2009))
        papers_2010 = provider_name[2010].tolist()
        total_2010 = provider_name['2010.4'].tolist()
        percent_papers_by_year.append(sum(papers_2010) / sum(total_2010))
        papers_2011 = provider_name[2011].tolist()
        total_2011 = provider_name['2011.4'].tolist()
        percent_papers_by_year.append(sum(papers_2011) / sum(total_2011))
        papers_2012 = provider_name[2012].tolist()
        total_2012 = provider_name['2012.4'].tolist()
        percent_papers_by_year.append(sum(papers_2012) / sum(total_2012))
        papers_2013 = provider_name[2013].tolist()
        total_2013 = provider_name['2013.4'].tolist()
        percent_papers_by_year.append(sum(papers_2013) / sum(total_2013))
        papers_2014 = provider_name[2014].tolist()
        total_2014 = provider_name['2014.4'].tolist()
        percent_papers_by_year.append(sum(papers_2014) / sum(total_2014))
        papers_2015 = provider_name[2015].tolist()
        total_2015 = provider_name['2015.4'].tolist()
        percent_papers_by_year.append(sum(papers_2015) / sum(total_2015))
        papers_2016 = provider_name[2016].tolist()
        total_2016 = provider_name['2016.4'].tolist()
        percent_papers_by_year.append(sum(papers_2016) / sum(total_2016))
        papers_2017 = provider_name[2017].tolist()
        total_2017 = provider_name['2017.4'].tolist()
        percent_papers_by_year.append(sum(papers_2017) / sum(total_2017))

        percent_papers_by_provider.append(percent_papers_by_year)

    years = [
        '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
        '2017'
    ]

    plt.figure(num=None, figsize=(10, 10))
    plt.suptitle(
        f'Percent of All Articles published by {your_institution} Authors')
    plt.xlabel('Year')
    plt.ylabel('Percentage')

    plt.gca().yaxis.set_major_formatter(
        StrMethodFormatter('{x:,.2%}'))  #formats y axis as %

    plt.plot(years, percent_papers_by_provider[0], label='Sage', color='blue')
    plt.plot(years,
             percent_papers_by_provider[1],
             label='Springer',
             color='green')
    plt.plot(years,
             percent_papers_by_provider[2],
             label='Taylor & Francis',
             color='purple')
    plt.plot(years,
             percent_papers_by_provider[3],
             label='Wiley',
             color='orange')
    plt.plot(years,
             percent_papers_by_provider[4],
             label='Elsevier Freedom',
             color='red',
             linestyle='dashed')
    plt.plot(years,
             percent_papers_by_provider[5],
             label='Elsevier Subscribed',
             color='red')
    plt.plot(years,
             percent_papers_by_provider[6],
             label='Elsevier Unmatched',
             color='black')

    plt.legend(loc='center left', bbox_to_anchor=(1, 0.8))
예제 #10
0
def figure5b_papers():
    """Show papers per year (2008-2017) by your institution's affiliated authors, separating Elsevier Freedom
    and Elsevier Subscribed titles out from Elsevier as a whole.  Papers are publications
    by you institution's affiliated authors.
    
    Chart Type: Line Graph
    Y-Axis: Number of Papers
    Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Total papers in Scopus per journal/provider
                        Elsevier_2019, Subscribed Journal List 2019
    X-Axis: Year
    X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider
                        Elsevier_2019, Subscribed Journal List 2019 
    """

    original_1figr_dataset = pd.read_excel(filename,
                                           sheet_name='Journals per Provider',
                                           skiprows=8)

    elsevier_freedom_collection = rf.make_freedom_collection_provider()
    elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider()
    elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider()

    #this holds papers totals for all providers in the end, which is used to make final plot
    papers_by_provider = []

    #populate papers totals for elsevier subset providers
    elsevier_providers = [
        elsevier_freedom_collection, elsevier_subscribed_titles,
        elsevier_unmatched_titles
    ]

    for provider_name in elsevier_providers:

        papers_by_year = []

        papers_2008 = provider_name[2008].tolist()
        papers_by_year.append(sum(papers_2008))
        papers_2009 = provider_name[2009].tolist()
        papers_by_year.append(sum(papers_2009))
        papers_2010 = provider_name[2010].tolist()
        papers_by_year.append(sum(papers_2010))
        papers_2011 = provider_name[2011].tolist()
        papers_by_year.append(sum(papers_2011))
        papers_2012 = provider_name[2012].tolist()
        papers_by_year.append(sum(papers_2012))
        papers_2013 = provider_name[2013].tolist()
        papers_by_year.append(sum(papers_2013))
        papers_2014 = provider_name[2014].tolist()
        papers_by_year.append(sum(papers_2014))
        papers_2015 = provider_name[2015].tolist()
        papers_by_year.append(sum(papers_2015))
        papers_2016 = provider_name[2016].tolist()
        papers_by_year.append(sum(papers_2016))
        papers_2017 = provider_name[2017].tolist()
        papers_by_year.append(sum(papers_2017))

        papers_by_provider.append(papers_by_year)

    #populate papers totals for other providers
    providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley']

    for provider_name in providers:

        subset_by_provider = original_1figr_dataset.loc[
            original_1figr_dataset['Provider'] == provider_name]

        papers_by_year = []

        papers_2008 = subset_by_provider[2008].tolist()
        papers_by_year.append(papers_2008[0])
        papers_2009 = subset_by_provider[2009].tolist()
        papers_by_year.append(papers_2009[0])
        papers_2010 = subset_by_provider[2010].tolist()
        papers_by_year.append(papers_2010[0])
        papers_2011 = subset_by_provider[2011].tolist()
        papers_by_year.append(papers_2011[0])
        papers_2012 = subset_by_provider[2012].tolist()
        papers_by_year.append(papers_2012[0])
        papers_2013 = subset_by_provider[2013].tolist()
        papers_by_year.append(papers_2013[0])
        papers_2014 = subset_by_provider[2014].tolist()
        papers_by_year.append(papers_2014[0])
        papers_2015 = subset_by_provider[2015].tolist()
        papers_by_year.append(papers_2015[0])
        papers_2016 = subset_by_provider[2016].tolist()
        papers_by_year.append(papers_2016[0])
        papers_2017 = subset_by_provider[2017].tolist()
        papers_by_year.append(papers_2017[0])

        papers_by_provider.append(papers_by_year)

    years = [
        '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
        '2017'
    ]

    plt.figure(num=None, figsize=(10, 10))
    plt.suptitle(f'Number of Articles (papers) by {your_institution} Authors')
    plt.xlabel('Year')
    plt.ylabel('Number of Articles')

    plt.plot(years,
             papers_by_provider[0],
             label='Elsevier Freedom',
             color='red',
             linestyle='dashed')
    plt.plot(years,
             papers_by_provider[1],
             label='Elsevier Subscribed',
             color='red')
    plt.plot(years,
             papers_by_provider[2],
             label='Elsevier Unmatched Titles',
             color='black')
    plt.plot(years, papers_by_provider[3], label='Sage', color='blue')
    plt.plot(years, papers_by_provider[4], label='Springer', color='green')
    plt.plot(years,
             papers_by_provider[5],
             label='Taylor & Francis',
             color='purple')
    plt.plot(years, papers_by_provider[6], label='Wiley', color='orange')

    plt.legend(loc='center left', bbox_to_anchor=(1, 0.8))