Python LabeledRecordFinder 예제들, cogent.parse.record_finder.LabeledRecordFinder Python 예제들

예제 #1

0

파일 보기

def RnaPlotParser(lines):
    """Returns sequence, coordinates, and pairing indices.
    """
    sequence = ''
    coordinates = []
    pairs = []
    if lines:
        #Split on sequence block
        sequence_finder = LabeledRecordFinder(is_label_line=\
            lambda x: x.startswith('/sequence'))
        prefix, seq_block = list(sequence_finder(lines))

        #split on coordinate block
        coordinate_finder = LabeledRecordFinder(is_label_line=\
            lambda x: x.startswith('/coor'))
        #sequence block is first item in list
        sequence_block, coord_block = list(coordinate_finder(seq_block))

        #split on pairs block
        pairs_finder = LabeledRecordFinder(is_label_line=\
            lambda x: x.startswith('/pairs'))
        #coordinate block is first item in list
        coordinate_block, pairs_block = list(pairs_finder(coord_block))

        sequence = get_sequence(sequence_block)
        coordinates = get_coordinates(coordinate_block)
        pairs = get_pairs(pairs_block)

    return sequence, coordinates, pairs

예제 #2

0

파일 보기

파일: meme.py 프로젝트: cxhernandez/pycogent

def extractCommandLineData(command_block):
    """Returns a dict of all command line data from MEME output.
    """
    data_dict = {}
    #Get only necessary Command Line Summary data
    ignore = lambda x: x.startswith('*')
    meme_model = LabeledRecordFinder(lambda x: 'model:' in x, ignore=ignore)
    cmd_data = list(meme_model(command_block))
    cmd_data = cmd_data[1]
    cmd_data = cmd_data[:-4]

    #Just return list of strings rather than parse data
    """
    cmd_data = '^'.join(cmd_data)
    cmd_data = cmd_data.split()
    cmd_data = ' '.join(cmd_data)
    cmd_data = cmd_data.split(': ')
    lastkarat = DelimitedSplitter('^',-1)
    cmd_data_temp = []
    for line in cmd_data:
        cmd_data_temp.extend(lastkarat(line))
    cmd_data = '>'.join(cmd_data_temp)
    cmd_data = cmd_data.replace('= ','=')
    cmd_data = cmd_data.replace('^',' ')
    cmd_data = cmd_data.split('>')
    """

    return cmd_data

예제 #3

0

파일 보기

파일: agilent_microarray.py 프로젝트: mikerobeson/pycogent

def MicroarrayParser(lines):
    """Returns tuple: ([ProbeNames],[GeneNames],[LogRatios]) for all dots in
    microarray file.
    """
    probe_names = []
    gene_names = []
    log_ratios = []
    #Make sure lines is not empty
    if lines:
        #Get the block of lines that starts with FEATURES
        features_record = LabeledRecordFinder(\
            lambda x: x.startswith('FEATURES'))
        features_block = list(features_record(lines))
        #Discard first block
        features_block = features_block[1]
        #Get the indices of GeneName and LogRatio from the block
        features_list = features_block[0].split('\t')
        probe_index = features_list.index('ProbeName')
        gene_index = features_list.index('GeneName')
        log_index = features_list.index('LogRatio')
        #Get the lists for GeneName and LogRatio
        for line in features_block[1:]:
            temp = line.split('\t')
            probe_names.append(temp[probe_index].upper())
            gene_names.append(temp[gene_index].upper())
            log_ratios.append(float(temp[log_index]))
    return (probe_names, gene_names, log_ratios)

예제 #4

0

파일 보기

파일: gibbs.py 프로젝트: mikerobeson/pycogent

def get_motif_p_value(lines):
    """Returns the motif p-value given motif block.
    """
    motif_p_finder = LabeledRecordFinder(lambda x: x.startswith('Log Motif'))
    motif_p_block = list(motif_p_finder(lines))[-1]
    log_motif_portion = float(motif_p_block[0].split()[-1])
    return exp(log_motif_portion)

예제 #5

0

파일 보기

파일: gibbs.py 프로젝트: mikerobeson/pycogent

def get_motif_sequences(lines):
    """Returns list of tuples with motif sequence information given motif block.
    
        - result is list of tuples :
            [(seq_num, motif_start, motif_seq, motif_sig),]
    """
    motif_list = []
    motif_seq_finder = LabeledRecordFinder(lambda x: 'columns' in x)
    motifs = list(motif_seq_finder(lines))[-1]
    for m in motifs[2:]:
        if ',' in m:
            curr = m.strip().split()
            motif_num = curr[1]
            seq_num = curr[0].split(',')[0]
            motif_start = int(curr[2]) - 1
            #If motif does not start at beginning of sequence:
            if motif_start > 0:
                motif_seq = curr[4]
            #Motif starts at beginning of sequence, no context before motif
            else:
                motif_seq = curr[3]
            motif_sig = float(curr[-3])
            motif_list.append(
                (seq_num, motif_start, motif_seq, motif_sig, motif_num))
        else:
            break
    return motif_list

예제 #6

0

파일 보기

파일: meme.py 프로젝트: cxhernandez/pycogent

def getSummaryBlock(module_blocks):
    """Returns summary of motifs block.
    """
    meme_summary = LabeledRecordFinder(lambda x: x.startswith('SUMMARY'),\
        constructor=None,ignore=lambda x: x.startswith(' '))
    summary_block = list(meme_summary(module_blocks))
    return summary_block[1]

예제 #7

0

파일 보기

파일: gibbs.py 프로젝트: mikerobeson/pycogent

def get_sequence_and_motif_blocks(lines):
    """Returns main block of data as a list.
    """
    gibbs_map_maximization = \
        LabeledRecordFinder(lambda x: 'MAP MAXIMIZATION RESULTS' in x)
    seq_block, motif_block = list(gibbs_map_maximization(lines))
    return seq_block, motif_block

예제 #8

0

파일 보기

파일: meme.py 프로젝트: cxhernandez/pycogent

def getDataBlock(lines):
    """Returns main block of data as list.
    """
    #Get main data block: All lines following "COMMAND LINE SUMMARY"
    meme_command = LabeledRecordFinder(lambda x: x.startswith('COMMAND'))
    main_block = list(meme_command(lines))
    alphabet = getMolType(main_block[0])
    return main_block[1], alphabet

예제 #9

0

파일 보기

파일: meme.py 프로젝트: cxhernandez/pycogent

def getModuleDataBlocks(module_blocks):
    """Returns list data blocks for each module.
    """
    #Get blocks of module information for each module
    meme_module_data = LabeledRecordFinder(lambda x: x.startswith('Motif'))
    module_data_blocks = []
    for module in module_blocks:
        module_data_blocks.append(list(meme_module_data(module)))
    return module_data_blocks

예제 #10

0

파일 보기

파일: flowgram_parser.py 프로젝트: cxhernandez/pycogent

def lazy_parse_sff_handle(handle):
    """Returns one flowgram at a time 
    """
    sff_info = LabeledRecordFinder(is_fasta_label, constructor=strip)
    sff_gen = sff_info(handle)

    header_lines = next(sff_gen)
    header = get_header_info(header_lines)

    return (_sff_parser(sff_gen, header), header)

예제 #11

0

파일 보기

파일: meme.py 프로젝트: cxhernandez/pycogent

def getCommandModuleBlocks(main_block):
    """Returns command line summary block and list of module blocks.
    """
    #Get Command line summary and all module information
    meme_module = LabeledRecordFinder(lambda x: x.startswith('MOTIF'))
    main_block = list(meme_module(main_block))
    command_block = main_block[0]
    module_blocks = []
    if len(main_block) > 1:
        module_blocks = main_block[1:]
    return command_block, module_blocks

예제 #12

0

파일 보기

 def test_parsers_ignore(self):
     """LabeledRecordFinder should skip lines to ignore."""
     def never(line):
         return False
     
     def ignore_labels(line):
         return (not line) or line.isspace() or line.startswith('#')
     
     def is_start(line):
         return line.startswith('>')
     
     lines = ['>abc','\n','1','>def','#ignore','2']
     self.assertEqual(list(LabeledRecordFinder(is_start)(lines)), 
         [['>abc', '1'],['>def','#ignore','2']])
     self.assertEqual(list(LabeledRecordFinder(is_start, 
         ignore=never)(lines)),
         [['>abc', '', '1'],['>def','#ignore','2']])
     self.assertEqual(list(LabeledRecordFinder(is_start, 
         ignore=ignore_labels)(lines)),
         [['>abc','1'],['>def','2']])

예제 #13

0

파일 보기

def RnaFoldParser(lines):
    """Returns a tuple containing sequence and dot plot indices.
    
        (sequence, (index1, index2, pair probability))
    """
    sequence = ''
    indices = []
    #Make sure lines is not empty
    if lines:
        #Get the block of lines that starts with /sequence
        sequence_block = LabeledRecordFinder(\
            lambda x: x.startswith('/sequence'))
        #only care about the second element in the result
        seq_block = list(sequence_block(lines))[1]
        #Get the sequence from the block
        sequence = getSequence(seq_block)
        #Get the indices and pair probabilites from the block
        indices = getIndices(seq_block)
    return (sequence, indices)

예제 #14

0

파일 보기

파일: gibbs.py 프로젝트: mikerobeson/pycogent

def get_sequence_map(lines):
    """Returns dict mapping Gibbs sequence number to sequence ID.
    
        - ex: sequence numbers mapping to gis:
            {'1':'1091044',
             '2':'11467494',
             '3':'11499727'}
    """
    sequence_map = {}
    sequence_finder = \
        LabeledRecordFinder(lambda x: x.startswith('Sequences to be Searched:'))
    sequence_block = list(sequence_finder(lines))[-1]
    for i in sequence_block[2:]:
        if i.startswith('#'):
            num, label = i.strip().split(' ', 1)
            num = num.strip()
            label = label.strip()
            sequence_map[num[1:]] = label
        else:
            break
    return sequence_map

예제 #15

0

파일 보기

파일: flowgram_parser.py 프로젝트: cxhernandez/pycogent

def get_summaries(handle, number_list=None, name_list=None, all_sums=False):
    """Returns specified flowgrams and sequence summaries as generator
    handle can be a list of lines or a file handle
    number_list is a list of the summaries wanted by their index in the sff
        file, starts at 0
    name_list is a list of the summaries wanted by their name in the sff file
    all_sums if true will yield all the summaries in the order they appear in
        the file

    One and only one of the parameters must be set
    """
    sff_info = LabeledRecordFinder(is_fasta_label, constructor=strip)
    sum_gen = sff_info(handle)

    if number_list:
        assert not (name_list or all_sums)
        num = len(number_list)
        for i, s in enumerate(sum_gen):
            if i - 1 in number_list:
                yield s
                num -= 1
            if num == 0:
                break

    elif name_list:
        assert not all_sums
        for s in sum_gen:
            if s[0].strip('>') in name_list:
                yield s

    elif all_sums:
        header = True
        for s in sum_gen:
            if header:
                header = False
                continue
            yield s
    else:
        raise ValueError(
            "number_list, name_list or all_sums must be specified")

예제 #16

0

파일 보기

파일: rna_struct.py 프로젝트: wangdi2014/for_qiime_scripts

def get_rnaplot_postscript(sequence, struct):
    """Returns postscript string for seq and struct.
    """
    #Params for RNAplot
    params = {'-t':'0',\
              '--pre':'%PreTextHere'}

    #Get the postscript list
    ps_list = plot_from_seq_and_struct(sequence,\
        struct,params=params).split('\n')
    
    #parse it into prefix and suffix lists
    pre_finder = LabeledRecordFinder(\
        is_label_line=lambda x: x.startswith('%PreTextHere'))
    prefix,suffix = list(pre_finder(ps_list))
    
    #Remove drawoutline and drawpairs commands form suffix
    new_suffix = []
    for s in suffix:
        if not (s.startswith('drawpairs') or s.startswith('drawoutline')):
            new_suffix.append(s)
    
    return '\n'.join(prefix), '\n'.join(new_suffix)

예제 #17

0

파일 보기

def is_gde_label(x):
    """Checks if x looks like a GDE label line."""
    return x and x[0] in '%#'


def is_blank_or_comment(x):
    """Checks if x is blank or a FASTA comment line."""
    return (not x) or x.startswith('#') or x.isspace()


def is_blank(x):
    """Checks if x is blank."""
    return (not x) or x.isspace()


FastaFinder = LabeledRecordFinder(is_fasta_label, ignore=is_blank_or_comment)

def MinimalFastaParser(infile, strict=True, \
    label_to_name=str, finder=FastaFinder, \
    is_label=None, label_characters='>'):
    """Yields successive sequences from infile as (label, seq) tuples.

    If strict is True (default), raises RecordError when label or seq missing.
    """

    for rec in finder(infile):
        #first line must be a label line
        if not rec[0][0] in label_characters:
            if strict:
                raise RecordError, "Found Fasta record without label line: %s"%\
                    rec

예제 #18

0

파일 보기

    def setUp(self):
        """Setup function for meme tests.
        """
        #Meme output data:
        self.meme_file = MEME_FILE.split('\n')            
        self.meme_main = LabeledRecordFinder(lambda x: x.startswith('COMMAND'))
        self.meme_command = LabeledRecordFinder(lambda x: x.startswith('MOTIF'))
        self.meme_summary = LabeledRecordFinder(lambda x: x.startswith('SUMMARY'))
        self.meme_module = LabeledRecordFinder(lambda x: x.startswith('Motif'))
        self.alphabet_block, self.main_block = \
                             list(self.meme_main(self.meme_file))
        self.cmd_mod_list = list(self.meme_command(self.main_block))
        self.command_block = self.cmd_mod_list[0]
        self.module_blocks = self.cmd_mod_list[1:]                            
        self.summary_block = list(self.meme_summary(self.module_blocks[-1]))[1]
        self.module_data_blocks = []
        for module in self.module_blocks:
            self.module_data_blocks.append(\
                list(self.meme_module(module)))

        #List and Dict for testing dictFromList function
        self.sample_list = ['key1',1,'key2',2,'key3',3,'key4',4]
        self.sample_dict = {'key1':1,
                            'key2':2,
                            'key3':3,
                            'key4':4,
                            }

        #List of command line data
        self.command_line_list = [
            'model:  mod=           tcm    nmotifs=         3    evt=        1e+100',
            'object function=  E-value of product of p-values',
            'width:  minw=            4    maxw=           10    minic=        0.00',
            'width:  wg=             11    ws=              1    endgaps=       yes',
            'nsites: minsites=        2    maxsites=       50    wnsites=       0.8',
            'theta:  prob=            1    spmap=         uni    spfuzz=        0.5',
            'em:     prior=   dirichlet    b=            0.01    maxiter=        20',
            'distance=    1e-05',
            'data:   n=             597    N=              15',
            'strands: +',
            'sample: seed=            0    seqfrac=         1',
            ]

        #List of dicts which contain general info for each module.
        self.module_info_dicts = [
            {'MOTIF':'1',
             'width':'10',
             'sites':'11',
             'llr':'131',
             'E-value':'1.3e-019',
             },
            {'MOTIF':'2',
             'width':'7',
             'sites':'11',
             'llr':'88',
             'E-value':'2.5e-006',
             },
            {'MOTIF':'3',
             'width':'7',
             'sites':'6',
             'llr':'53',
             'E-value':'5.5e-001',
             },
            ]

        #Summary dict
        self.summary_dict = {'CombinedP':{
            '1': float(3.48e-02),
            '11': float(3.78e-05),
            '17': float(2.78e-08),
            '28': float(3.49e-06),
            '105': float(3.98e-06),
            '159': float(1.08e-02),
            '402-C01': float(4.22e-07),
            '407-A07': float(7.32e-08),
            '410-A10': float(4.23e-04),
            '505-D01': float(5.72e-07),
            '507-B04-1': float(1.01e-04),
            '518-D12': float(2.83e-06),
            '621-H01': float(8.69e-07),
            '625-H05': float(8.86e-06),
            '629-C08': float(5.61e-07),
            }
                             }
        self.remap_dict = {
            '11':'11',
            '1':'1',
            '407-A07':'407-A07',
            '17':'17',
            '159':'159',
            '505-D01':'505-D01',
            '28':'28',
            '507-B04-1':'507-B04-1',
            '402-C01':'402-C01',
            '621-H01':'621-H01',
            '629-C08':'629-C08',
            '410-A10':'410-A10',
            '105':'105',
            '625-H05':'625-H05',
            '518-D12':'518-D12'
            }


        #ModuleInstances and Modules
        self.ModuleInstances = [
            [ModuleInstance('CTATTGGGGC',Location('629-C08',18,28),
                            float(1.95e-06)),
             ModuleInstance('CTATTGGGGC',Location('621-H01',45,55),
                            float(1.95e-06)),
             ModuleInstance('CTATTGGGGC',Location('505-D01',26,36),
                            float(1.95e-06)),
             ModuleInstance('CTATTGGGGC',Location('407-A07',5,15),
                            float(1.95e-06)),
             ModuleInstance('CTATTGGGGC',Location('105',0,10),
                            float(1.95e-06)),
             ModuleInstance('CTATTGGGGC',Location('28',3,13),
                            float(1.95e-06)),
             ModuleInstance('CTATTGGGGC',Location('17',16,26),
                            float(1.95e-06)),
             ModuleInstance('CTATTGGGCC',Location('402-C01',24,34),
                            float(3.30e-06)),
             ModuleInstance('CTAGTGGGGC',Location('625-H05',2,12),
                            float(5.11e-06)),
             ModuleInstance('CTAGTGGGCC',Location('11',15,25),
                            float(6.37e-06)),
             ModuleInstance('CTATTGGGGT',Location('518-D12',0,10),
                            float(9.40e-06)),
             ],
            [ModuleInstance('CGTTACG',Location('629-C08',37,44),
                            float(6.82e-05)),
             ModuleInstance('CGTTACG',Location('621-H01',30,37),
                            float(6.82e-05)),
             ModuleInstance('CGTTACG',Location('507-B04-1',8,15),
                            float(6.82e-05)),
             ModuleInstance('CGTTACG',Location('410-A10',7,14),
                            float(6.82e-05)),
             ModuleInstance('CGTTACG',Location('407-A07',26,33),
                            float(6.82e-05)),
             ModuleInstance('CGTTACG',Location('17',0,7),
                            float(6.82e-05)),
             ModuleInstance('TGTTACG',Location('625-H05',32,39),
                            float(1.74e-04)),
             ModuleInstance('TGTTACG',Location('505-D01',3,10),
                            float(1.74e-04)),
             ModuleInstance('CATTACG',Location('518-D12',30,37),
                            float(2.14e-04)),
             ModuleInstance('CGGTACG',Location('402-C01',1,8),
                            float(2.77e-04)),
             ModuleInstance('TGTTCCG',Location('629-C08',5,12),
                            float(6.45e-04)),
             ],
            [ModuleInstance('CTATTGG',Location('629-C08',57,64),
                            float(1.06e-04)),
             ModuleInstance('CTATTGG',Location('507-B04-1',42,49),
                            float(1.06e-04)),
             ModuleInstance('CTATTGG',Location('410-A10',27,34),
                            float(1.06e-04)),
             ModuleInstance('CTATTGG',Location('159',14,21),
                            float(1.06e-04)),
             ModuleInstance('CTATTGG',Location('1',18,25),
                            float(1.06e-04)),
             ModuleInstance('CTAATGG',Location('507-B04-1',28,35),
                            float(1.63e-04)),
            ],
        ]

             
        self.Modules = []
        for module, info in zip(self.ModuleInstances, self.module_info_dicts):
            curr_module_data = {}
            for instance in module:
                curr_module_data[(instance.Location.SeqId,
                             instance.Location.Start)] = instance
            temp_module = Module(curr_module_data, MolType=DNA,
                                 Evalue=float(info['E-value']),
                                 Llr=int(info['llr']))
            self.Modules.append(temp_module)
        
        self.ConsensusSequences = ['CTATTGGGGC','CGTTACG','CTATTGG']

예제 #19

0

파일 보기

    taxonomy = ' '.join(taxonomy.split())
    #separate by semicolons
    taxa = map(strip, taxonomy.split(';'))  #get rid of leading/trailing spaces
    #delete trailing period if present
    last = taxa[-1]
    if last.endswith('.'):
        taxa[-1] = last[:-1]
    return species, taxa


def is_feature_component_start(line):
    """Checks if a line starts with '/', ignoring whitespace."""
    return line.lstrip().startswith('/')


feature_component_iterator = LabeledRecordFinder(is_feature_component_start)

_join_with_empty = dict.fromkeys(['translation'])
_leave_as_lines = {}


def parse_feature(lines):
    """Parses a feature. Doesn't handle subfeatures.
    
    Returns dict containing:
    'type': source, gene, CDS, etc.
    'location': unparsed location string
    ...then, key-value pairs for each annotation, 
        e.g. '/gene="MNBH"' -> {'gene':['MNBH']} (i.e. quotes stripped)
    All relations are assumed 'to many', and order will be preserved.
    """

예제 #20

0

파일 보기

def is_cutg_label(x):
    """Checks if x looks like a CUTG label line."""
    return x.startswith('>')


def is_cutg_species_label(x):
    """Checks if x looks like a CUTG label line."""
    return ':' in x


def is_blank(x):
    """Checks if x is blank."""
    return (not x) or x.isspace()


CutgSpeciesFinder = LabeledRecordFinder(is_cutg_species_label, ignore=is_blank)

CutgFinder = LabeledRecordFinder(is_cutg_label, ignore=is_blank)

codon_order = "CGA CGC CGG CGU AGA AGG CUA CUC CUG CUU UUA UUG UCA UCC UCG UCU AGC AGU ACA ACC ACG ACU CCA CCC CCG CCU GCA GCC GCG GCU GGA GGC GGG GGU GUA GUC GUG GUU AAA AAG AAC AAU CAA CAG CAC CAU GAA GAG GAC GAU UAC UAU UGC UGU UUC UUU AUA AUC AUU AUG UGG UAA UAG UGA".split(
)

#NOTE: following field order omits Locus/CDS (first field), which needs further
#processing. Use zip(field_order, fields[1:]) and handle first field specially.
field_order = "GenBank Location Length GenPept Species Description".split()

species_label_splitter = DelimitedSplitter(':', -1)


def CutgSpeciesParser(infile, strict=True, constructor=CodonUsage):
    """Yields successive sequences from infile as CodonUsage objects.

예제 #21

0

파일 보기

파일: locuslink.py 프로젝트: wangdi2014/for_qiime_scripts

from cogent.parse.record_finder import LabeledRecordFinder
from string import maketrans, strip

__author__ = "Rob Knight"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Rob Knight"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Rob Knight"
__email__ = "*****@*****.**"
__status__ = "Development"

def ll_start(line):
    """Returns True if line looks like the start of a LocusLink record."""
    return line.startswith('>>')
LLFinder = LabeledRecordFinder(ll_start)

pipes = DelimitedSplitter('|', None)
first_pipe = DelimitedSplitter('|')
commas = DelimitedSplitter(',', None)
first_colon = DelimitedSplitter(':', 1)

accession_wrapper = FieldWrapper(['Accession', 'Gi', 'Strain'], pipes)
def _read_accession(line):
    """Reads accession lines: format is Accession | Gi | Strain."""
    return MappedRecord(accession_wrapper(line))

rell_wrapper = FieldWrapper(['Description', 'Id', 'IdType', 'Printable'], pipes)
def _read_rell(line):
    """Reads RELL lines: format is Description|Id|IdType|Printable"""
    return MappedRecord(rell_wrapper(line))

예제 #22

0

파일 보기

파일: flowgram_parser.py 프로젝트: cxhernandez/pycogent

def get_all_summaries(lines):
    """Returns all the flowgrams and sequence summaries in list of lists"""
    sff_info = LabeledRecordFinder(is_fasta_label, constructor=strip)

    return list(sff_info(lines))[1::]

예제 #23

0

파일 보기

    
    WARNING: Only maps the data type if the key is in label_constructors above.
    """
    if not line.startswith("#"):
        raise ValueError, "Labels must start with a # symbol."

    if line.find(":") == -1:
        raise ValueError, "Labels must contain a : symbol."

    key, value = map(strip, line[1:].split(":", 1))
    key = key.upper()
    if key in label_constructors:
        value = label_constructors[key](value)
    return key, value

BlatFinder = LabeledRecordFinder(query_finder, constructor=strip, \
    ignore=is_blat_junk)

BlastFinder = LabeledRecordFinder(query_finder, constructor=strip, \
    ignore=is_blast_junk)

PsiBlastFinder = LabeledRecordFinder(iter_finder, constructor=strip, \
    ignore=is_blast_junk)

PsiBlastQueryFinder = LabeledRecordFinder(iteration_set_finder, \
    constructor=strip, ignore=is_blast_junk)


def GenericBlastParser9(lines, finder, make_col_headers=False):
    """Yields successive records from lines (props, data list) 
        
    Infile must in blast9 format

예제 #24

0

파일 보기

파일: gibbs.py 프로젝트: mikerobeson/pycogent

def get_motif_blocks(lines):
    """Returns list of motif blocks given main block as lines.
    """
    gibbs_motif = LabeledRecordFinder(lambda x: 'MOTIF' in x)
    return list(gibbs_motif(lines))[1:]

예제 #25

0

파일 보기

파일: mage.py 프로젝트: mikerobeson/pycogent

        elif len(attr) == 1:
            result.State = attr
        #handle line width
        elif attr.startswith('width'):
            result.Width = int(attr[5:])
        else:
            #otherwise assume it's a color label
            result.Color = attr
    return result

def _is_keyword(line):
    if line.startswith('@'):
        return True
    return False

KeywordFinder = LabeledRecordFinder(_is_keyword)

def MageParser(infile):
    """MageParser returns a new kinemage object, created from a string repr.

    infile: should be an iterable file object
    
    The MageParser works only on ONE kinemage object, so files containing more
    than one kinemage should be split beforehand. This can easily be adjusted
    if it would be useful in the future.

    The MageParser handles only certain keywords (@kinemage, @text, @caption,
    @___group, @____list) and MagePoints at this point in time. All unkown 
    keywords are assumed to be part of the header, so you can find them in 
    the header information. 
    The lists that are part of the Simplex header are treated as normal lists.

예제 #26

0

파일 보기

 def setUp(self):
     """Define a standard LabeledRecordFinder"""
     self.FastaLike = LabeledRecordFinder(lambda x: x.startswith('>'))