Example #1
0
def arxiv_articles(chunk=None):
    '''arxiv_articles
    Get ArXiv article data chunks.

    Parameters
    ----------
    chunk : int
        Specify a particular chunk of data to download. There are 17
        chunks numbered 0 through 16.
    
    Returns
    -------
    generator
        A generator that returns ArXiv articles in chunks of 100,000
    
    or

    DataFrame
        A dataframe from a single chunk.
    '''
    if chunk is None:
        for i in range(16):
            key = f'{folder}/arxiv_articles_{i:02}.pkl.bz2'
            yield load_df_pkl(bucket, key)
    else:
        key = f'{folder}/arxiv_articles_{chunk:02}.pkl.bz2'
        return load_df_pkl(bucket, key)
Example #2
0
def patents_10k():
    '''patents_10k
    Gets a pre-selected sample of 10,000 patents from ONS.
    '''
    bucket='innovation-mapping-tutorials'
    patents_10k_key='ons/ONS_y02_sample_10000.pkl.bz2'
    return load_df_pkl(bucket, patents_10k_key)
Example #3
0
def cordis_table(table):
    '''cordis_table
    Get a table from the CORDIS database.

    Parameters
    ----------
    table : str
        Name of the table to load. Tables available include:
            - organisations
            - project_organisations
            - project_proposal_calls
            - project_topics
            - projects
            - proposal_calls
            - publications
            - reports
            - topics

    Returns
    -------
    DataFrame
        A dataframe with containing the CORDIS table data.
    '''
    key=f'{folder}/cordis_{table}.pkl.bz2'
    return load_df_pkl(bucket, key)
Example #4
0
def mag_table(table='fields_of_study'):
    '''grid_table
    Get tables from the Microsoft Academic Graph database.

    Parameters
    ----------
    table : str
        Name of the Microsoft Academic Graph table to load. 
        Tables available include:
            - fields_of_study

    Returns
    -------
    DataFrame
        A dataframe with containing the Microsoft Academic Graph table data.
    '''
    key = f'{folder}/mag_{table}.pkl.bz2'
    return load_df_pkl(bucket, key)
Example #5
0
def grid_table(table):
    '''grid_table
    Get tables from the GRID database.

    Parameters
    ----------
    table : str
        Name of the GRID table to load. Tables available include:
            - aliases
            - institutes

    Returns
    -------
    DataFrame
        A dataframe with containing the GRID table data.
    '''
    key = f'{folder}/grid_{table}.pkl.bz2'
    return load_df_pkl(bucket, key)
Example #6
0
def arxiv_table(table):
    '''arxiv_table
    Get a list of 
    Parameters
    ----------
    table : str
        Name of the ArXiv table to load. Tables available include:
            - article_categories
            - article_corex_topics
            - article_fields_of_study
            - article_institutes
            - categories
            - corex_topics
    Returns
    -------
    DataFrame
        A dataframe with containing the ArXiv table data.
    '''
    key = f'{folder}/arxiv_{table}.pkl.bz2'
    return load_df_pkl(bucket, key)
Example #7
0
def gtr_link_table(table):
    '''gtr_link_table
    Get a link table from the Gateway to Research database.
    Link tables link project ids to other entities within GtR.

    Parameters
    ----------
    table : str
        Name of the link table to load. Tables available include:
            - funds
            - organisations
            - organisations_locations
            - outcomes_artisticandcreativeproducts
            - outcomes_collaborations
            - outcomes_disseminations
            - outcomes_furtherfundings
            - outcomes_impactsummaries
            - outcomes_intellectualproperties
            - outcomes_keyfindings
            - outcomes_policyinfluences
            - outcomes_products
            - outcomes_publications
            - outcomes_researchdatabaseandmodels
            - outcomes_researchmaterials
            - outcomes_softwareandtechnicalproducts
            - outcomes_spinouts
            - participant
            - persons
            - topic
    Returns
    -------
    DataFrame
        A dataframe with containing the GtR table data.
    '''
    key = f'{folder}/link_tables/gtr_{table}_link.pkl.bz2'
    return load_df_pkl(bucket, key)
Example #8
0
def patents_10k():
    '''patents_10k
    Gets a pre-selected sample of 10,000 patents from ONS.
    '''
    patents_10k_key = 'ons/ONS_y02_sample_10000.pkl.bz2'
    return load_df_pkl(bucket, patents_10k_key)