def annotate(chromos, pos, anno):
    """Annotate genomic locations.

    Tests if sites specified by `chromos` and `pos` are annotated by `anno`.

    Parameters
    ----------
    chromos: :class:`numpy.ndarray`
        :class:`numpy.ndarray` with chromosome of sites.
    pos: :class:`numpy.ndarray`
        :class:`numpy.ndarray` with position on chromosome of sites.
    anno: :class:`pandas.DataFrame`
        :class:`pandas.DataFrame` with columns `chromo`, `start`, `end` that
        specify annotated regions.

    Returns
    -------
    :class:`numpy.ndarray`
        Binary :class:`numpy.ndarray` of same length as `chromos` indicating if
        positions are annotated.
    """
    idx = []
    for chromo in np.unique(chromos):
        chromo_pos = pos[chromos == chromo]
        chromo_anno = anno.loc[anno.chromo == chromo]
        chromo_idx = is_in(chromo_pos,
                           chromo_anno['start'].values,
                           chromo_anno['end'].values)
        idx.append(chromo_idx)
    idx = np.hstack(idx)
    return idx
Beispiel #2
0
def annotate(chromos, pos, anno):
    """Annotate genomic locations.

    Tests if sites specified by `chromos` and `pos` are annotated by `anno`.

    Parameters
    ----------
    chromos: :class:`numpy.ndarray`
        :class:`numpy.ndarray` with chromosome of sites.
    pos: :class:`numpy.ndarray`
        :class:`numpy.ndarray` with position on chromosome of sites.
    anno: :class:`pandas.DataFrame`
        :class:`pandas.DataFrame` with columns `chromo`, `start`, `end` that
        specify annotated regions.

    Returns
    -------
    :class:`numpy.ndarray`
        Binary :class:`numpy.ndarray` of same length as `chromos` indicating if
        positions are annotated.
    """
    idx = []
    for chromo in np.unique(chromos):
        chromo_pos = pos[chromos == chromo]
        chromo_anno = anno.loc[anno.chromo == chromo]
        chromo_idx = is_in(chromo_pos, chromo_anno['start'].values,
                           chromo_anno['end'].values)
        idx.append(chromo_idx)
    idx = np.hstack(idx)
    return idx
Beispiel #3
0
def test_is_in():
    ys = [2, 4, 12, 17]
    ye = [2, 8, 15, 18]

    x = [-1, 2, 2, 3, 4, 8, 15, 16]
    expect = [False, True, True, False, True, True, True, False]
    result = annos.is_in(x, ys, ye)
    npt.assert_array_equal(result, expect)
Beispiel #4
0
def test_is_in():
    ys = [2, 4, 12, 17]
    ye = [2, 8, 15, 18]

    x = [-1, 2, 2, 3, 4, 8, 15, 16]
    expect = [False, True, True, False, True, True, True, False]
    result = annos.is_in(x, ys, ye)
    npt.assert_array_equal(result, expect)
Beispiel #5
0
def annotate(anno_file, chromo, pos):
    anno_file = dat.GzipFile(anno_file, 'r')
    anno = pd.read_table(anno_file, header=None, usecols=[0, 1, 2],
                         dtype={0: 'str', 1: 'int32', 2: 'int32'})
    anno_file.close()
    anno.columns = ['chromo', 'start', 'end']
    anno.chromo = anno.chromo.str.upper().str.replace('CHR', '')
    anno = anno.loc[anno.chromo == chromo]
    anno.sort_values('start', inplace=True)
    start, end = an.join_overlapping(anno.start.values, anno.end.values)
    anno = np.array(an.is_in(pos, start, end), dtype='int8')
    return anno
Beispiel #6
0
def annotate(anno_file, chromo, pos):
    anno_file = dat.GzipFile(anno_file, 'r')
    anno = pd.read_table(anno_file, header=None, usecols=[0, 1, 2],
                         dtype={0: 'str', 1: 'int32', 2: 'int32'})
    anno_file.close()
    anno.columns = ['chromo', 'start', 'end']
    anno.chromo = anno.chromo.str.upper().str.replace('CHR', '')
    anno = anno.loc[anno.chromo == chromo]
    anno.sort_values('start', inplace=True)
    start, end = an.join_overlapping(anno.start.values, anno.end.values)
    anno = np.array(an.is_in(pos, start, end), dtype='int8')
    return anno