Esempio n. 1
0
def _get_unionization_from_experiment(experiment_id,
                                      structures=None,
                                      attributes=None,
                                      average=True,
                                      verbose=False):
    """
    Gets unionization data for provided experiment(s) `experiment_id`

    Parameters
    ----------
    experiment_id : int or list
        Numerical experiment ID. If multiple experiments are provided the
        requested `attributes` will be averaged across experiments
    structures : list, optional
        List of structures (id, acronym, or name) for which to get unionization
        information associated with provided `experiment_id`. If not specified
        uses structures documented in [1]_. Specifying either the id or name is
        recommended as acronyms are not unique to structures. Default: None
    attributes : str or list, optional
        Which attributes / information to obtain for the provided structure.
        See :func:`abagen.mouse.available_unionization_info` for list of
        available attributes to request. If not specified all available
        attributes will be returned. Default: None
    average : bool, optional
        Whether to average across experiments if `experiment_id` is provided as
        a list. Only experiments probing the same gene will be considered for
        averaging. Default: True
    verbose : bool, optional
        Whether to print status messages. Default: False

    Returns
    -------
    unionization : pandas.DataFrame
        Where columns are unionization attributes and the index corresponds to
        gene ids and strucuture ids

    References
    ----------
    .. [1] Rubinov, M., Ypma, R. J., Watson, C., & Bullmore, E. T. (2015).
       Wiring cost and topological participation of the mouse brain connectome.
       Proceedings of the National Academy of Sciences, 112(32), 10032-10037.
    """

    if isinstance(experiment_id, (str, int)):
        experiment_id = [experiment_id]

    if structures is None:
        # read default structure list (from Rubinov et al., 2015, PNAS)
        structures = fetch_rubinov2015_structures(entry_type='id')
    elif isinstance(structures, (str, int)):
        structures = [structures]

    # we need to coerce all provided structures to be integer ids, NOT strings
    # so fetch all available structures then recode them to ids
    if any(isinstance(f, str) for f in structures):
        structs = np.asarray(fetch_allenref_structures(verbose=False))
        structs = Recoder(structs.tolist(), fields=['acronym', 'id', 'name'])
        structures = list(set(structs.id.get(f) for f in structures))

    # determine which attributes to request; if we don't have to request all
    # of them then we can speed up the API call
    if attributes is None:
        attributes = ['expression_density']
    elif attributes == 'all':
        attributes = _UNIONIZATION_ATTRIBUTES
    elif isinstance(attributes, str):
        attributes = [attributes]

    includes = ['structure_unionizes', 'genes']
    criteria = [
        '[id$in{}]'.format(','.join([str(f) for f in experiment_id])),
        'products[id$eq1]',
        'structure_unionizes[structure_id$in{}]'.format(','.join(
            [str(f) for f in structures])),
    ]
    req_attributes = [
        'id', 'structure_unionizes', 'structure_unionizes.structure_id'
    ] + ['structure_unionizes.' + attr for attr in attributes]

    info = _make_api_query('SectionDataSet',
                           includes=includes,
                           criteria=criteria,
                           attributes=req_attributes,
                           verbose=verbose)

    for n, exp in enumerate(info):
        keep = exp['structure_unionizes']
        for struc in keep:
            struc['gene_id'] = exp['genes'][0]['id']
            struc['experiment_id'] = exp['id']
        info[n] = keep

    # construct data frame from requested unionization info
    info = pd.DataFrame(list(itertools.chain.from_iterable(info)))
    if average:
        info = info.groupby(['gene_id', 'structure_id']).mean()
    else:
        info = info.set_index(['gene_id', 'experiment_id', 'structure_id'])

    return info[attributes]
Esempio n. 2
0
def get_unionization_from_gene(id=None,
                               acronym=None,
                               name=None,
                               slicing_direction='sagittal',
                               structures=None,
                               attributes=None,
                               average=True,
                               verbose=False):
    """
    Gets unionization data for provided gene(s)

    One of `id`, `acronym`, or `name` must be provided.

    Parameters
    ----------
    id : int, optional
        Numerical gene ID
    acronym : str, optional
        Short-form gene acronym (case sensitive)
    name : str, optional
        Full gene name (case sensitive)
    slicing_direction : {'sagittal', 'coronal'}, optional
        Slicing direction of brain tissue
    structures : list, optional
        List of structures (id, acronym, or name) for which to get unionization
        information associated with provided `experiment_id`. If not specified
        uses structures documented in [1]_. Specifying either the id or name is
        recommended as acronyms are not unique to structures. Default: None
    attributes : str or list, optional
        Which attributes / information to obtain for the provided gene. See
        :func:`abagen.mouse.available_gene_info` for list of available
        attributes to request. If not specified then only 'expression_density'
        will be returned. Specifying 'all' will return all information.
        Default: None
    average : bool, optional
        Whether to average across experiments if there are multiple experiments
        corresponding to any provided gene(s). Only experiments probing the
        same gene will be considered for averaging, and distinct structures
        will be retained. Default: True
    verbose : bool, optional
        Whether to print status messages. Default: False

    Returns
    -------
    unionization : pandas.DataFrame
        Where columns are unionization attributes and the index corresponds to
        strucuture and gene ids (if `experiments` is provided as a list
        with multiple genes). If `average=False`, `experiments` will also be a
        level in index

    Examples
    --------
    >>> from abagen import mouse
    >>> mouse.get_unionization_from_gene(acronym='Pdyn', structures=[22, 31])
                          expression_density
    gene_id structure_id
    18376   22                      0.024840
            31                      0.017199

    >>> mouse.get_unionization_from_gene(acronym=['Ace', 'Cd99'], structures=[22, 31])
                          expression_density
    gene_id structure_id
    11210   22                      0.001283
            31                      0.001427
    163028  22                      0.067537
            31                      0.056442

    References
    ----------
    .. [1] Rubinov, M., Ypma, R. J., Watson, C., & Bullmore, E. T. (2015).
       Wiring cost and topological participation of the mouse brain connectome.
       Proceedings of the National Academy of Sciences, 112(32), 10032-10037.
    """  # noqa

    directions = ['sagittal', 'coronal']
    if slicing_direction not in directions:
        raise ValueError(
            'Slicing_direction {} is invalid. Must be in {}.'.format(
                slicing_direction, directions))

    if structures is None:
        # read default structure list (from Rubinov et al., 2015, PNAS)
        structures = fetch_rubinov2015_structures(entry_type='id')
    elif isinstance(structures, (str, int)):
        structures = [structures]

    # we need to coerce all provided structures to be integer ids, NOT strings
    # so fetch all available structures then recode them to ids
    if any(isinstance(f, str) for f in structures):
        structs = np.asarray(fetch_allenref_structures(verbose=False))
        structs = Recoder(structs.tolist(), fields=['acronym', 'id', 'name'])
        structures = list(set(structs.id.get(f) for f in structures))

    exp_ids = _get_experiments_from_gene(id=id,
                                         acronym=acronym,
                                         name=name,
                                         slicing_direction=slicing_direction,
                                         verbose=verbose)

    data = _get_unionization_from_experiment(exp_ids,
                                             structures=structures,
                                             attributes=attributes,
                                             average=average,
                                             verbose=verbose)

    return data