コード例 #1
0
    def test_parsers(self):
        """DelimitedSplitter should return function with correct behavior"""
        empty = DelimitedSplitter()
        space = DelimitedSplitter(None)
        semicolon = DelimitedSplitter(';')
        twosplits = DelimitedSplitter(';', 2)
        allsplits = DelimitedSplitter(';', None)
        lastone = DelimitedSplitter(';', -1)
        lasttwo = DelimitedSplitter(';', -2)

        self.assertEqual(empty('a   b  c'), ['a', 'b  c'])
        self.assertEqual(empty('abc'), ['abc'])
        self.assertEqual(empty('   '), [])

        self.assertEqual(empty('a  b  c'), space('a  b  c'))
        self.assertEqual(semicolon('  a  ; b   ;  c  d'), ['a', 'b   ;  c  d'])
        self.assertEqual(twosplits('  a  ; b   ;  c  d'), ['a', 'b', 'c  d'])
        self.assertEqual(allsplits(' a ;  b  ; c;;d;e  ;'),\
            ['a','b','c','','d','e',''])
        self.assertEqual(lastone(' a ;  b  ; c;;d;e  ;'),\
            ['a ;  b  ; c;;d;e',''])
        self.assertEqual(lasttwo(' a ;  b  ; c;;d;e  ;'),\
            ['a ;  b  ; c;;d','e',''])
        self.assertEqual(lasttwo(''), [])
        self.assertEqual(lasttwo('x'), ['x'])
        self.assertEqual(lasttwo('x;'), ['x', ''])
コード例 #2
0
ファイル: greengenes.py プロジェクト: yatisht/pycogent
def DefaultDelimitedSplitter(delimiter):
    """Wraps delimited splitter to handle empty records"""
    parser = DelimitedSplitter(delimiter=delimiter)
    def f(line):
        parsed = parser(line)
        if len(parsed) == 1:
            parsed.append('')
        return parsed
    return f
コード例 #3
0
 def test_splitter(self):
     """StrictFieldWrapper with splitter should use that splitter"""
     fields = ['label', 'count']
     splitter = DelimitedSplitter(':', -1)
     f = StrictFieldWrapper(fields, splitter)
     self.assertEqual(f('n:k:n:a:sd  '), {
         'label': 'n:k:n:a',
         'count': 'sd'
     })
     self.assertEqual(f('nknasd:'), {'label': 'nknasd', 'count': ''})
     self.assertRaises(FieldError, f, '')
コード例 #4
0
    def test_full_LOC(self):
        """LOC should behave as expected when initialized with rich data"""
        data = [
            "abc\t def", " 3 \t n", "  abc   \txyz\n\n", "x\t5", "fgh   ",
            "x\t3    "
        ]

        class rec(MappedRecord):
            Required = {'abc': []}

        maps = {'abc': list_adder, 'x': int_setter, 'fgh': bool_setter}
        label_splitter = DelimitedSplitter('\t')
        constructor = rec
        strict = True
        loc_bad = LineOrientedConstructor(data, label_splitter, maps, \
            constructor, strict)
        self.assertRaises(FieldError, loc_bad)
        strict = False
        loc_good = LineOrientedConstructor(data, label_splitter, maps, \
            constructor, strict)
        result = loc_good()
        assert isinstance(result, rec)
        self.assertEqual(result, \
            {'abc':['def','xyz'], '3':'n','fgh':False,'x':3})
コード例 #5
0
    """Checks if x is blank."""
    return (not x) or x.isspace()


CutgSpeciesFinder = LabeledRecordFinder(is_cutg_species_label, ignore=is_blank)

CutgFinder = LabeledRecordFinder(is_cutg_label, ignore=is_blank)

codon_order = "CGA CGC CGG CGU AGA AGG CUA CUC CUG CUU UUA UUG UCA UCC UCG UCU AGC AGU ACA ACC ACG ACU CCA CCC CCG CCU GCA GCC GCG GCU GGA GGC GGG GGU GUA GUC GUG GUU AAA AAG AAC AAU CAA CAG CAC CAU GAA GAG GAC GAU UAC UAU UGC UGU UUC UUU AUA AUC AUU AUG UGG UAA UAG UGA".split(
)

#NOTE: following field order omits Locus/CDS (first field), which needs further
#processing. Use zip(field_order, fields[1:]) and handle first field specially.
field_order = "GenBank Location Length GenPept Species Description".split()

species_label_splitter = DelimitedSplitter(':', -1)


def CutgSpeciesParser(infile, strict=True, constructor=CodonUsage):
    """Yields successive sequences from infile as CodonUsage objects.

    If strict is True (default), raises RecordError when label or seq missing.
    """
    if not strict:  #easier to see logic without detailed error handling
        for rec in CutgSpeciesFinder(infile):
            try:
                label, counts = rec
                if not is_cutg_species_label(label):
                    continue
                species, genes = species_label_splitter(label)
                info = Info({'Species': species, 'NumGenes': int(genes)})
コード例 #6
0
__author__ = "Rob Knight"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Rob Knight"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Rob Knight"
__email__ = "*****@*****.**"
__status__ = "Development"

def ll_start(line):
    """Returns True if line looks like the start of a LocusLink record."""
    return line.startswith('>>')
LLFinder = LabeledRecordFinder(ll_start)

pipes = DelimitedSplitter('|', None)
first_pipe = DelimitedSplitter('|')
commas = DelimitedSplitter(',', None)
first_colon = DelimitedSplitter(':', 1)

accession_wrapper = FieldWrapper(['Accession', 'Gi', 'Strain'], pipes)
def _read_accession(line):
    """Reads accession lines: format is Accession | Gi | Strain."""
    return MappedRecord(accession_wrapper(line))

rell_wrapper = FieldWrapper(['Description', 'Id', 'IdType', 'Printable'], pipes)
def _read_rell(line):
    """Reads RELL lines: format is Description|Id|IdType|Printable"""
    return MappedRecord(rell_wrapper(line))

accnum_wrapper = FieldWrapper(['Accession','Gi','Strain','Start','End'], pipes)
コード例 #7
0
ファイル: clustal.py プロジェクト: mikerobeson/pycogent
        else:
            result[key] = [val]
            labels.append(key)
    return result, labels


def is_clustal_seq_line(line):
    """Returns True if line starts with a non-blank character but not 'CLUSTAL'.

    Useful for filtering other lines out of the file.
    """
    return line and (not line[0].isspace()) and\
        (not line.startswith('CLUSTAL')) and (not line.startswith('MUSCLE'))


last_space = DelimitedSplitter(None, -1)


def delete_trailing_number(line):
    """Deletes trailing number from a line.

    WARNING: does not preserve internal whitespace when a number is removed! 
    (converts each whitespace run to a single space). Returns the original
    line if it didn't end in a number.
    """
    pieces = line.split()
    try:
        int(pieces[-1])
        return ' '.join(pieces[:-1])
    except ValueError:  #no trailing numbers
        return line