def __init__(self, inStr): # print 'seq input', inStr self.successfullyParsed = False self.features = [] self.residues_nuc = '-' self.residues_prt = '-' self.translations = [] self.actualSeqIdNo = 0 self.actualMolType = '-' self.actualLength = 0 self.mixedMode = False self.isSkipCode = False m = sequencePattern.match(inStr) if m: self.seqIdNo = safeStrip(m.group('seqIdNo')) self.length = safeStrip(m.group('length')) self.molType = safeStrip(m.group('molType')) self.organism = safeStrip(m.group('organism')) featureTable = safeStrip(m.group('featureTable')) self.seqNo400 = safeStrip(m.group('seqNo400')) if featureTable: fiter = re.finditer(featurePattern, featureTable) for fmatcher in fiter: if fmatcher: self.features.append(Feature(fmatcher)) residues = m.group('residues') nucList = [] prtList = [] for line in residues.splitlines(): if nucPattern.match(line): nucList.append(line) else: #if prtPattern.match(line): TODO: add more robust code prtList.append(line) self.residues_nuc = ''.join(nucList) self.residues_prt = ''.join(prtList) self.residues_nuc = re.sub(r'[\s,\d]', '', self.residues_nuc) self.residues_prt = re.sub(r'[\s,\d]', '', self.residues_prt) if len(self.residues_nuc) > 0 and len(self.residues_prt) > 0: self.mixedMode = True if self.residues_nuc == '' and self.residues_prt == '': self.isSkipCode = True if self.mixedMode: currentStart = 0 for f in self.features: if f.key == 'CDS': t = su.getRangeFromLocation(f.location) currentTranslationLength = t[1] - t[0] currentEnd = currentStart + currentTranslationLength +1 currentTranslation = self.residues_prt[currentStart:currentEnd] currentStart = currentEnd self.translations.append(currentTranslation) f.translation = currentTranslation self.__setActualMolType__() self.__setActualLength__() self.successfullyParsed = True #TODO: to add unittest for False # print '='*30 else: raise SeqlException('Parser failed for input:\n%s' % inStr)
def __init__(self, aStr): self.successfullyParsed = False self.features = [] self.residues_nuc = '-' self.residues_prt = '-' self.translations = [] self.actualSeqIdNo = 0 self.actualMolType = '-' self.actualLength = 0 self.mixedMode = False self.isSkipCode = False sm = SEQUENCE_PATTERN.match(aStr) if sm: # print 'Sequence match found.' self.seqIdNo_raw = sm.group('seqIdNo_raw') self.seqIdNo = safeStrip(sm.group('seqIdNo')) self.length_raw = sm.group('length_raw') self.length = safeStrip(sm.group('length')) self.molType_raw = sm.group('molType_raw') self.molType = safeStrip(sm.group('molType')) self.organism_raw = sm.group('organism_raw') self.organism = safeStrip(sm.group('organism')) featuresString = sm.group('features_raw') # print featuresString if featuresString: featureMatchers = FEATURE_PATTERN.finditer(featuresString) for fm in featureMatchers: self.features.append(Feature(fm)) self.residues_raw = sm.group('residues_raw') self.seqNo400 = safeStrip(sm.group('seqNo400')) residues = sm.group('residues') nucList = [] prtList = [] for line in residues.splitlines(): if nucPattern.match(line): nucList.append(line) else: #if prtPattern.match(line): TODO: add more robust code prtList.append(line) self.residues_nuc = ''.join(nucList) self.residues_prt = ''.join(prtList) self.residues_nuc = re.sub(r'[\s,\d]', '', self.residues_nuc) self.residues_prt = re.sub(r'[\s,\d]', '', self.residues_prt) if len(self.residues_nuc) > 0 and len(self.residues_prt) > 0: self.mixedMode = True # TODO: test it if self.residues_nuc == '' and self.residues_prt == '': self.isSkipCode = True if self.mixedMode: currentStart = 0 for f in self.features: if f.key == 'CDS': t = su.getRangeFromLocation(f.location) currentTranslationLength = t[1] - t[0] currentEnd = currentStart + currentTranslationLength + 1 currentTranslation = self.residues_prt[ currentStart:currentEnd] currentStart = currentEnd self.translations.append(currentTranslation) f.translation = currentTranslation self.__setActualMolType__() self.__setActualLength__() self.successfullyParsed = True else: # print 'File', self.filePath print 'Sequence: No match for sequence pattern for input:', aStr
def __init__(self, aStr): self.successfullyParsed = False self.features = [] self.residues_nuc = '-' self.residues_prt = '-' self.translations = [] self.actualSeqIdNo = 0 self.actualMolType = '-' self.actualLength = 0 self.mixedMode = False self.isSkipCode = False sm = SEQUENCE_PATTERN.match(aStr) if sm: # print 'Sequence match found.' self.seqIdNo_raw = sm.group('seqIdNo_raw') self.seqIdNo = safeStrip(sm.group('seqIdNo')) self.length_raw = sm.group('length_raw') self.length = safeStrip(sm.group('length')) self.molType_raw = sm.group('molType_raw') self.molType = safeStrip(sm.group('molType')) self.organism_raw = sm.group('organism_raw') self.organism = safeStrip(sm.group('organism')) featuresString = sm.group('features_raw') # print featuresString if featuresString: featureMatchers = FEATURE_PATTERN.finditer(featuresString) for fm in featureMatchers: self.features.append(Feature(fm)) self.residues_raw = sm.group('residues_raw') self.seqNo400 = safeStrip(sm.group('seqNo400')) residues = sm.group('residues') nucList = [] prtList = [] for line in residues.splitlines(): if nucPattern.match(line): nucList.append(line) else: #if prtPattern.match(line): TODO: add more robust code prtList.append(line) self.residues_nuc = ''.join(nucList) self.residues_prt = ''.join(prtList) self.residues_nuc = re.sub(r'[\s,\d]', '', self.residues_nuc) self.residues_prt = re.sub(r'[\s,\d]', '', self.residues_prt) if len(self.residues_nuc) > 0 and len(self.residues_prt) > 0: self.mixedMode = True # TODO: test it if self.residues_nuc == '' and self.residues_prt == '': self.isSkipCode = True if self.mixedMode: currentStart = 0 for f in self.features: if f.key == 'CDS': t = su.getRangeFromLocation(f.location) currentTranslationLength = t[1] - t[0] currentEnd = currentStart + currentTranslationLength +1 currentTranslation = self.residues_prt[currentStart:currentEnd] currentStart = currentEnd self.translations.append(currentTranslation) f.translation = currentTranslation self.__setActualMolType__() self.__setActualLength__() self.successfullyParsed = True else: # print 'File', self.filePath print 'Sequence: No match for sequence pattern for input:', aStr