Ejemplo n.º 1
0
 def test_init_empty(self):
     """LOC empty init should succeed with expected defaults"""
     l = LineOrientedConstructor()
     self.assertEqual(l.Lines, [])
     self.assertEqual(l.LabelSplitter(' ab  cd  '), ['ab', 'cd'])
     self.assertEqual(l.FieldMap, {})
     self.assertEqual(l.Constructor, MappedRecord)
     self.assertEqual(l.Strict, False)
Ejemplo n.º 2
0
 def test_empty_LOC(self):
     """LOC empty should fail if strict, fill fields if not strict"""
     data = ["abc   def", "3  n", "\t  abc   \txyz\n\n", "fgh   "]
     l = LineOrientedConstructor()
     result = l()
     self.assertEqual(result, {})
     result = l([])
     self.assertEqual(result, {})
     result = l(['   ', '\n\t   '])
     self.assertEqual(result, {})
     result = l(data)
     self.assertEqual(result, {'abc': 'xyz', '3': 'n', 'fgh': None})
Ejemplo n.º 3
0
    def test_full_LOC(self):
        """LOC should behave as expected when initialized with rich data"""
        data = [
            "abc\t def", " 3 \t n", "  abc   \txyz\n\n", "x\t5", "fgh   ",
            "x\t3    "
        ]

        class rec(MappedRecord):
            Required = {'abc': []}

        maps = {'abc': list_adder, 'x': int_setter, 'fgh': bool_setter}
        label_splitter = DelimitedSplitter('\t')
        constructor = rec
        strict = True
        loc_bad = LineOrientedConstructor(data, label_splitter, maps, \
            constructor, strict)
        self.assertRaises(FieldError, loc_bad)
        strict = False
        loc_good = LineOrientedConstructor(data, label_splitter, maps, \
            constructor, strict)
        result = loc_good()
        assert isinstance(result, rec)
        self.assertEqual(result, \
            {'abc':['def','xyz'], '3':'n','fgh':False,'x':3})
Ejemplo n.º 4
0
def _sts_adder(obj, field, val):
    """Appends the current STS-type record to specified field"""
    list_adder(obj, field, _read_sts(val))


def _seq_adder(obj, field, val):
    """Appends the current Sequence-type record to specified field"""
    list_adder(obj, field, _read_seq(val))


def _protsim_adder(obj, field, val):
    """Appends the current ProtSim record to specified field"""
    list_adder(obj, field, _read_protsim(val))


LinesToUniGene = LineOrientedConstructor()
LinesToUniGene.Constructor = UniGene
LinesToUniGene.FieldMap = {
    'LOCUSLINK': int_setter,
    'EXPRESS': _expressions_setter,
    'PROTSIM': _protsim_adder,
    'SCOUNT': int_setter,
    'SEQUENCE': _seq_adder,
    'STS': _sts_adder,
}


def UniGeneParser(lines):
    """Treats lines as a stream of unigene records"""
    for record in GbFinder(lines):
        curr = LinesToUniGene(record)
Ejemplo n.º 5
0
    """Sets specified field to a list of expressions"""
    setattr(obj, field, semi_splitter(val))

def _sts_adder(obj, field, val):
    """Appends the current STS-type record to specified field"""
    list_adder(obj, field, _read_sts(val))

def _seq_adder(obj, field, val):
    """Appends the current Sequence-type record to specified field"""
    list_adder(obj, field, _read_seq(val))

def _protsim_adder(obj, field, val):
    """Appends the current ProtSim record to specified field"""
    list_adder(obj, field, _read_protsim(val))
 
LinesToUniGene = LineOrientedConstructor()
LinesToUniGene.Constructor = UniGene
LinesToUniGene.FieldMap = {
    'LOCUSLINK':int_setter,
    'EXPRESS':_expressions_setter,
    'PROTSIM':_protsim_adder,
    'SCOUNT':int_setter,
    'SEQUENCE':_seq_adder,
    'STS':_sts_adder,
}

def UniGeneParser(lines):
    """Treats lines as a stream of unigene records"""
    for record in GbFinder(lines):
        curr = LinesToUniGene(record)
        del curr['//']  #clean up delimiter
Ejemplo n.º 6
0
_ll_fieldmap['GO'] = _go_adder
_ll_fieldmap['EXTANNOT'] = _extannot_adder
_ll_fieldmap['MAP'] = _map_adder
_ll_fieldmap['CDD'] = _cdd_adder
_ll_fieldmap['ASSEMBLY'] = _assembly_adder
_ll_fieldmap['CONTIG'] = _contig_adder
for field in 'NG ACCNUM'.split():
    _ll_fieldmap[field] = _accnum_adder
for field in 'NR NM NC NP XG XR XM XP PROT'.split():
    _ll_fieldmap[field] = _accession_adder
for field in _ll_multi:
    if field not in _ll_fieldmap:
        _ll_fieldmap[field] = _generic_adder


LinesToLocusLink = LineOrientedConstructor()
LinesToLocusLink.Constructor = LocusLink
LinesToLocusLink.FieldMap = _ll_fieldmap
LinesToLocusLink.LabelSplitter = first_colon

def LocusLinkParser(lines):
    """Treats lines as a stream of unigene records"""
    for record in LLFinder(lines):
        curr = LinesToLocusLink(record)
        yield curr

if __name__ == '__main__':
    from sys import argv, stdout
    filename = argv[1]
    count = 0
    for record in LocusLinkParser(open(filename)):
Ejemplo n.º 7
0
_ll_fieldmap['GO'] = _go_adder
_ll_fieldmap['EXTANNOT'] = _extannot_adder
_ll_fieldmap['MAP'] = _map_adder
_ll_fieldmap['CDD'] = _cdd_adder
_ll_fieldmap['ASSEMBLY'] = _assembly_adder
_ll_fieldmap['CONTIG'] = _contig_adder
for field in 'NG ACCNUM'.split():
    _ll_fieldmap[field] = _accnum_adder
for field in 'NR NM NC NP XG XR XM XP PROT'.split():
    _ll_fieldmap[field] = _accession_adder
for field in _ll_multi:
    if field not in _ll_fieldmap:
        _ll_fieldmap[field] = _generic_adder


LinesToLocusLink = LineOrientedConstructor()
LinesToLocusLink.Constructor = LocusLink
LinesToLocusLink.FieldMap = _ll_fieldmap
LinesToLocusLink.LabelSplitter = first_colon

def LocusLinkParser(lines):
    """Treats lines as a stream of unigene records"""
    for record in LLFinder(lines):
        curr = LinesToLocusLink(record)
        yield curr

if __name__ == '__main__':
    from sys import argv, stdout
    filename = argv[1]
    count = 0
    for record in LocusLinkParser(open(filename)):