Python ConvertFields Examples

Programming Language: Python

Namespace/Package Name: cogent.parse.table

Class/Type: ConvertFields

Examples at hotexamples.com: 6

Python ConvertFields - 6 examples found. These are the top rated real world Python examples of cogent.parse.table.ConvertFields extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ConvertFields(6)

Frequently Used Methods

ConvertFields (6)

Example #1

Show file

File: infernal.py Project: mikerobeson/pycogent

def CmalignScoreParser(lines):
    """Parser for tabfile format cmalign score result.
    
        - IMPORTANT: Will only parse standard output from cmalign.
                
        - NOTE: Will only work with search result files with a single CM
            as a query.  Will not work with multiple alignment result files
            that have been concatenated.
        
        - Result will be list of hits with following order:
        [seq idx, seq name, seq len, total bit score, struct bit score,
            avg prob, elapsed time]
        
    """
    # Converting indices and %GC to integers and bit score to float.
    # Since E-value is only present if CM is calibrated, leaving as string.
    conversion_fields = [(0,int),(2,int),(3,float),(4,float),(5,float)]
    cmalign_score_converter = ConvertFields(conversion_fields)
    #Ignore hash characters
    good_lines = []
    for l in lines:
        line = l.strip()
        if line.startswith('# STOCKHOLM 1.0'):
            break
        if line and (not line.startswith('#')):
            good_lines.append(l)
    #make parser
    cmalign_score_parser = SeparatorFormatParser(with_header=False,\
                                            converter=cmalign_score_converter,\
                                            ignore=None,\
                                            sep=None)
    
    return cmalign_score_parser(good_lines)

Example #2

Show file

File: infernal.py Project: mikerobeson/pycogent

def CmsearchParser(lines):
    """Parser for tabfile format cmsearch result.
    
        - IMPORTANT: Will not parse standard output from cmsearch.  You must
            use --tabfile with cmsearch to get correct format to use this
            parser.
        
        - NOTE: Will only work with search result files with a single CM
            as a query.  Will not work with multiple search result files
            that have been concatenated.
        
        - Result will be list of hits with following order:
        [target name, target start, target stop, query start, query stop,
            bit score, E-value, GC%]
        
    """
    # Converting indices and %GC to integers and bit score to float.
    # Since E-value is only present if CM is calibrated, leaving as string.
    conversion_fields = [(2,int),(3,int),(4,int),(5,int),(6,float),(8,int)]
    cmsearch_converter = ConvertFields(conversion_fields)
    #Ignore hash characters
    good_lines = []
    for l in lines:
        if not l.startswith('#'):
            good_lines.append(l)
    #make parser
    cmsearch_parser = SeparatorFormatParser(with_header=False,\
                                            converter=cmsearch_converter,\
                                            ignore=None,\
                                            sep=None)
    
    return cmsearch_parser(good_lines)

Example #3

Show file

File: psl.py Project: yatisht/pycogent

                line.append('')

    header = []
    for t, b in zip(*lines):
        if t.strip().endswith('-'):
            c = t.strip() + b
        else:
            c = ' '.join([t.strip(), b.strip()])
        header += [c.strip()]
    return header


int_series = lambda x: map(int, x.replace(',', ' ').split())

row_converter = ConvertFields([(i, int) for i in range(8)]+\
                              [(i, int) for i in range(10, 13)]+\
                              [(i, int) for i in range(14, 18)]+\
                              [(i, int_series) for i in range(18, 21)])


def MinimalPslParser(data, row_converter=row_converter):
    """returns version, header and rows from data"""
    if type(data) == str:
        data = open(data)

    psl_version = None
    header = None
    rows = []

    for record in data:
        if psl_version is None:
            assert 'psLayout version' in record

Example #4

Show file

File: bowtie.py Project: yatisht/pycogent

"""
from cogent import LoadTable
from cogent.parse.table import ConvertFields

__author__ = "Gavin Huttley, Anuj Pahwa"
__copyright__ = "Copyright 2007-2016, The Cogent Project"
__credits__ = ["Rob Knight", "Peter Maxwell", "Gavin Huttley", "Anuj Pahwa"]
__license__ = "GPL"
__version__ = "1.9"
__maintainer__ = "Gavin Huttley"
__email__ = "*****@*****.**"
__status__ = "Development"

# The 4th and the 7th elements of the row of data returned from bowtie are
# integer values and can thus be converted.
row_converter = ConvertFields([(3, int), (6, int)])


def BowtieOutputParser(data, row_converter=row_converter):
    """yields a header and row of data from the default bowtie output
    
    Arguments:
        - row_converter: if not provided, uses a default converter which casts
          the Offset and Other Matches fields to ints. If set to None, all
          returned data will be strings (this is faster).
    """

    header = [
        'Query Name', 'Strand Direction', 'Reference Name', 'Offset',
        'Query Seq', 'Quality', 'Other Matches', 'Mismatches'
    ]

Example #5

Show file

File: sam.py Project: cameron-jack/Chippy

    return val


def get_strand(val):
    """returns 1/-1 for strand from bitwise operation"""
    v = int(val)
    strand = [-1, 1][v & 16 == 0]
    return strand


def zero_based(val):
    """returns a zero-based integer"""
    return int(val) - 1


strict_converter = ConvertFields([(1, int), (3, int), (4, int),
                                  (5, _strict_cigar_span)])

converter = ConvertFields([(1, get_strand), (3, zero_based), (4, _int_str),
                           (5, _cigar_span)])

# SAM fields: QNAME, FLAG, RNAME, POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL, OPT
complete_converter = ConvertFields([(0, str), (1, get_strand), (2, str),
                                    (3, zero_based), (4, int),
                                    (5, _cigar_span), (6, str), (7, int),
                                    (8, int), (9, str), (10, str), (11, str)])


def MinimalSamParser(data, converter=converter):
    """returns records from a sam file

    NOTE: the default converter turns the 1-based numbering of POS into

Example #6

Show file

File: bed.py Project: cameron-jack/Chippy

    """ returns 1/-1 for strand in place of '+' or '-' """
    strand = [-1, 1][val == '+']
    return strand


pattern = re.compile(r'[0-9,X,Y,MT]+')


def _get_chrom(val):
    """ returns the int component of a chromosome number """
    chrom = pattern.search(val).group(0)
    return chrom


# BED3 defines: chrom, chromStart, chromEnd
bed3_converter = ConvertFields([(0, _get_chrom), (1, int), (2, int)])

# BED6 adds: Name, score, strand
converter = ConvertFields([(0, _get_chrom), (1, int), (2, int), (3, str),
                           (4, int), (5, _get_strand)])

# BED12 additional fields: thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts
complete_converter = ConvertFields([(0, _get_chrom), (1, int), (2, int),
                                    (3, str), (4, int), (5, _get_strand),
                                    (6, int), (7, int), (8, tuple), (9, int),
                                    (10, tuple), (11, tuple)])


def MinimalBedParser(data, converter=converter):
    """returns data lines from a BED file