Esempio n. 1
0
def make_country_share_tables(truncate=False):
    """Make final country share tables.

    The ``country_share_`` tables describe the proportion of articles produced by country-affiliated authors for the
    EU, US, or China (exclusively), vs. any other country/countries. Restricted by citation percentile.

    :param truncate: If True, overwrite tables that exist.
    :return: None
    """
    for x in CITATION_PERCENTILES:
        make_ntile_table(x, truncate)
        make_ntile_table(x,
                         truncate,
                         sql_path='country_share_arxiv_coverage_template',
                         table_suffix='_arxiv')
    make_ntile_table(100,
                     truncate,
                     sql_path='country_share_min_percentile_template',
                     table_suffix='_arxiv_min')
    for dataset in ['ds', 'mag', 'wos']:
        make_ntile_table(100,
                         truncate,
                         sql_path=f'country_share_template_{dataset}',
                         table_suffix=f'_{dataset}')
    make_table('country_shares', truncate)
Esempio n. 2
0
def main():
    table_name = "competition"
    make_table(table_name, clobber=True)
    summarize_by_year(table_name)
    summarize_percent_ai_by_year(table_name)
    summarize_by_source(table_name)
    summarize_by_person_mention(table_name)
    summarize_by_author(table_name)
    summarize_by_organization_mention(table_name)
Esempio n. 3
0
def make_comparison_table(truncate=False):
    """Make (publication) comparison table.

    The ``comparison`` table gives for each publication in the analysis the results from each alternative method/model.

    :param truncate: If True, overwrite table that exists.
    :return: None
    """
    make_table('comparison', truncate)
Esempio n. 4
0
def make_citation_count_tables(truncate=False) -> None:
    """Make citation count tables.

    :param truncate: If True, overwrite tables that exist.
    :return: None.
    """
    make_table('wos_citation_counts', truncate)
    make_table('ds_citation_counts', truncate)
    make_table('mag_citation_counts', truncate)
    make_table('all_citation_counts', truncate)
Esempio n. 5
0
def make_overlap_tables(truncate=False):
    """Make final overlap tables.

    These tables describe overlap between positive/negative predictions by method/model, for Venn diagrams in analysis.

    :param truncate: If True, overwrite tables that exist.
    :return: None
    """
    make_table('summary', truncate)
    # The same, but for publications in the top percentile - see comments in SQL for implementation notes
    make_table('summary_1pct', truncate)
    make_table('summary_arxiv_1pct', truncate)
    make_table('summary_arxiv_1pct_min', truncate)
Esempio n. 6
0
def make_arxiv_tables(truncate=False):
    """Make arXiv subject coverage tables.

    By arXiv subject coverage, we mean whether arXiv includes any papers about a subject defined by a dataset.

    We do this for each data source independently (DS, WOS, and MAG), then combine the results in an
    ``all_arxiv_categories`` table.

    :param truncate: If True, overwrite tables that exist.
    :return: None
    """
    make_table('wos_arxiv_categories', truncate)
    make_table('ds_arxiv_categories', truncate)
    make_table('mag_arxiv_categories', truncate)
    make_table('all_arxiv_categories', truncate)
Esempio n. 7
0
def make_country_tables(truncate=False):
    """Make tables giving author affiliation countries.

    We do this for each data source independently (DS, WOS, and MAG), then combine the results in an
    ``all_countries`` table.

    :param truncate: If True, overwrite tables that exist.
    :return: None
    """
    # Load manual country labels for DS
    ds_country_codes = pd.read_csv('data/input/ds-country-codings.csv')
    ds_country_codes.to_gbq(f'{DATASET}.ds_country_codings',
                            if_exists='replace',
                            project_id=client.project)
    make_table('ds_countries', truncate)
    make_table('wos_countries', truncate)
    make_table('mag_countries', truncate)
    make_table('all_countries', truncate)
Esempio n. 8
0
def make_result_tables(truncate=False):
    """Make tables giving results from alternative methods/models.

    There are three methods: CSET keywords, Elsevier's keyword-classifier hybrid, and CSET's SciBERT models. There
    are four SciBERT models: any-subject / all AI, CV, NLP, and robotics.

    For each SciBERT models, we report two results: with and without a requirement that only papers with a subject we
    consider covered by arXiv (see ``make_arxiv_tables``) can be predicted relevant.

    :param truncate: If True, overwrite tables that exist.
    :return: None
    """
    make_table('keyword_results', truncate)
    make_table('elsevier_results', truncate)
    make_table('scibert_results', truncate)
    make_table('category_results', truncate)
    make_table('mag_ai', truncate)
    make_table('all_predictions', truncate)
Esempio n. 9
0
def make_all(truncate=False) -> None:
    """Write query results to tables.

    :param truncate: If True, overwrite tables that exist.
    """
    make_table('en_2010_2020', truncate)
    make_table('cset_ids', truncate)
    make_table('wide_ids', truncate)
    make_citation_count_tables(truncate)
    make_country_tables(truncate)
    make_arxiv_tables(truncate)
    make_table('all_years', truncate)
    make_result_tables(truncate)
    make_table('ds_percentiles', truncate)
    make_table('mag_percentiles', truncate)
    make_table('wos_percentiles', truncate)
    make_table('percentiles', truncate)
    make_table('min_percentiles', truncate)
    make_table('all_categories', truncate)
    make_table('category_results', truncate)
    make_comparison_table(truncate)
    make_country_share_tables(truncate)
    make_overlap_tables(truncate)
    make_ancillary_tables(truncate)
    make_table('mag_ai_fields_of_study', truncate)
    make_table('mag_ai_fields_overlap', truncate)
Esempio n. 10
0
def make_ancillary_tables(truncate=False):
    """Make ancillary tables addressing questions we have about the data or validity.

    :param truncate: If True, overwrite tables that exist.
    :return: None.
    """
    # What is each dataset's coverage of arXiv?
    make_table('dataset_arxiv_coverage', truncate)
    # Which/how many publications do we observe only in MAG? For affiliation country availability (see analysis folder)
    make_table('mag_only', truncate)
    # Which papers in MAG have AI-plausible subject categories?
    make_table('dataset_overlap', truncate)
    make_table('dataset_overlap_by_prediction', truncate)
    make_table('citations_by_dataset', truncate)
    make_table('mag_subfield_scores', truncate)
    make_table('mag_replication', truncate)