Esempi in Python per remap_blast, esempi in Python per blast.remap_blast

Esempio n. 1

0

Mostra file

def blast_to_df_iter(fn, delimiter='\t', chunksize=10000, remap=True):
    '''Iterator of DataFrames of length chunksize parsed from an
    NCBI BLAST+ `-outfmt6` file.

    Native BLAST+ uses an interval of the form [start,end), start >= 1. This
    changes to [end,start) when on the negative strand, apparently solely
    to make other bioinformaticians suffer.

    We convert to proper 0-based, half-open intervals.

    Args:
        fn (str): The results file.
        chunksize (int): Hits per iteration.
    Yields:
        DataFrame: Pandas DataFrme with the BLAST+ hits.
    '''

    for group in pd.read_table(fn,
                               header=None,
                               skipinitialspace=True,
                               names=[k for k, _ in blast_cols],
                               delimiter=delimiter,
                               chunksize=chunksize):
        convert_dtypes(group, dict(blast_cols))
        if remap:
            remap_blast(group)
        yield group

Esempio n. 2

0

Mostra file

def crb_to_df_iter(fn, chunksize=10000, remap=False):
    '''Iterator of DataFrames of length chunksize parsed from
    the results from CRBB version crb-blast 0.6.6.

    Args:
        fn (str): The results file.
        chunksize (int): Hits per iteration.
    Yields:
        DataFrame: Pandas DataFrame with the CRBB hits.
    '''

    for group in pd.read_table(fn,
                               header=None,
                               names=[k for k, _ in crb_cols],
                               delimiter='\t',
                               chunksize=chunksize):

        convert_dtypes(group, dict(crb_cols))

        qrange = group.qrange.str.partition('..')
        group['qstart'] = qrange[0].astype(int)
        group['qend'] = qrange[2].astype(int)
        del group['qrange']
        srange = group.srange.str.partition('..')
        group['sstart'] = srange[0].astype(int)
        group['send'] = srange[2].astype(int)
        del group['srange']

        if remap:
            remap_blast(group)
        yield group

Esempio n. 3

0

Mostra file

File: parsers.py Progetto: macmanes-lab/dammit

def crb_to_df_iter(fn, chunksize=10000, remap=False):
    '''Iterator of DataFrames of length chunksize parsed from
    the results from CRBB version crb-blast 0.6.6.

    Args:
        fn (str): The results file.
        chunksize (int): Hits per iteration.
    Yields:
        DataFrame: Pandas DataFrame with the CRBB hits.
    '''

    for group in pd.read_table(fn, header=None, names=[k for k, _ in crb_cols],
                                delimiter='\t', chunksize=chunksize):

        convert_dtypes(group, dict(crb_cols))

        qrange = group.qrange.str.partition('..')
        group['qstart'] = qrange[0].astype(int)
        group['qend'] = qrange[2].astype(int)
        del group['qrange']
        srange = group.srange.str.partition('..')
        group['sstart'] = srange[0].astype(int)
        group['send'] = srange[2].astype(int)
        del group['srange']


        if remap:
            remap_blast(group)
        yield group

Esempio n. 4

0

Mostra file

File: parsers.py Progetto: camillescott/dammit

def crb_to_df_iter(fn, chunksize=10000, remap=True):
    '''Iterator of DataFrames of length chunksize parsed from
    the results from CRBB version crb-blast 0.6.6.

    crb-blast is given the same treatment as BLAST+, as that's what
    it uses under the hood.

    We convert to proper 0-based, half-open intervals.

    Args:
        fn (str): The results file.
        chunksize (int): Hits per iteration.
    Yields:
        DataFrame: Pandas DataFrame with the CRBB hits.
    '''

    for group in pd.read_table(fn, header=None, names=[k for k, _ in crb_cols],
                                delimiter='\t', chunksize=chunksize):

        convert_dtypes(group, dict(crb_cols))

        qrange = group.qrange.str.partition('..')
        group['qstart'] = qrange[0].astype(int)
        group['qend'] = qrange[2].astype(int)
        del group['qrange']
        srange = group.srange.str.partition('..')
        group['sstart'] = srange[0].astype(int)
        group['send'] = srange[2].astype(int)
        del group['srange']

        if remap:
            remap_blast(group)
        yield group

Esempio n. 5

0

Mostra file

File: parsers.py Progetto: macmanes-lab/dammit

def blast_to_df_iter(fn, delimiter='\t', chunksize=10000, remap=False):
    '''Iterator of DataFrames of length chunksize parsed from an
    NCBI BLAST+ `-outfmt6` file.

    Args:
        fn (str): The results file.
        chunksize (int): Hits per iteration.
    Yields:
        DataFrame: Pandas DataFrme with the BLAST+ hits.
    '''

    for group in pd.read_table(fn, header=None, skipinitialspace=True,
                                names=[k for k, _ in blast_cols],
                                delimiter=delimiter, chunksize=chunksize):
        convert_dtypes(group, dict(blast_cols))
        if remap:
            remap_blast(group)
        yield group

Esempio n. 6

0

Mostra file

def blast_to_df_iter(fn, delimiter='\t', chunksize=10000, remap=False):
    '''Iterator of DataFrames of length chunksize parsed from an
    NCBI BLAST+ `-outfmt6` file.

    Args:
        fn (str): The results file.
        chunksize (int): Hits per iteration.
    Yields:
        DataFrame: Pandas DataFrme with the BLAST+ hits.
    '''

    for group in pd.read_table(fn,
                               header=None,
                               skipinitialspace=True,
                               names=[k for k, _ in blast_cols],
                               delimiter=delimiter,
                               chunksize=chunksize):
        convert_dtypes(group, dict(blast_cols))
        if remap:
            remap_blast(group)
        yield group

Esempio n. 7

0

Mostra file

def crb_to_df_iter(fn, chunksize=10000, remap=True):
    '''Iterator of DataFrames of length chunksize parsed from
    the results from CRBB version crb-blast 0.6.6.

    crb-blast is given the same treatment as BLAST+, as that's what
    it uses under the hood.

    We convert to proper 0-based, half-open intervals.

    Args:
        fn (str): The results file.
        chunksize (int): Hits per iteration.
    Yields:
        DataFrame: Pandas DataFrame with the CRBB hits.
    '''

    for group in pd.read_table(fn,
                               header=None,
                               names=[k for k, _ in crb_cols],
                               delimiter='\t',
                               chunksize=chunksize):

        convert_dtypes(group, dict(crb_cols))

        qrange = group.qrange.str.partition('..')
        group['qstart'] = qrange[0].astype(int)
        group['qend'] = qrange[2].astype(int)
        del group['qrange']
        srange = group.srange.str.partition('..')
        group['sstart'] = srange[0].astype(int)
        group['send'] = srange[2].astype(int)
        del group['srange']

        if remap:
            remap_blast(group)
        yield group

Esempio n. 8

0

Mostra file

File: parsers.py Progetto: camillescott/dammit

def blast_to_df_iter(fn, delimiter='\t', chunksize=10000, remap=True):
    '''Iterator of DataFrames of length chunksize parsed from an
    NCBI BLAST+ `-outfmt6` file.

    Native BLAST+ uses an interval of the form [start,end), start >= 1. This
    changes to [end,start) when on the negative strand, apparently solely
    to make other bioinformaticians suffer.

    We convert to proper 0-based, half-open intervals.

    Args:
        fn (str): The results file.
        chunksize (int): Hits per iteration.
    Yields:
        DataFrame: Pandas DataFrme with the BLAST+ hits.
    '''

    for group in pd.read_table(fn, header=None, skipinitialspace=True,
                                names=[k for k, _ in blast_cols],
                                delimiter=delimiter, chunksize=chunksize):
        convert_dtypes(group, dict(blast_cols))
        if remap:
            remap_blast(group)
        yield group