def __init__(self, seq): self.seq = seq self.featureSizeEstimators = [] self.sequenceEstimatedSize = 0 self.sequenceDataLength = sum([cu.TAG_LENGTH_ST26['SequenceData'], len(seq.seqIdNo) + cu.TAG_LENGTH_ST26['sequenceIDNumber'], cu.TAG_LENGTH_ST26['INSDSeq']]) self.INSDSeq_lengthLength = len(seq.length) + cu.TAG_LENGTH_ST26['INSDSeq_length'] self.INSDSeq_moltypeLength = len('AA' if seq.molType == 'PRT' else seq.molType) + cu.TAG_LENGTH_ST26['INSDSeq_moltype'] self.INSDSeq_divisionLength = len("PAT") + cu.TAG_LENGTH_ST26['INSDSeq_division'] self.INSDSeq_feature_tableLength = cu.TAG_LENGTH_ST26['INSDSeq_feature-table'] sourceFeatureSizeEstimator = self.getSourceFeatureSizeEstimator(seq) self.featureSizeEstimators.append(sourceFeatureSizeEstimator) residues = seq.residues_nuc if seq.molType == 'PRT': residues = cu.oneLetterCode(seq.residues_prt) self.INSDSeq_sequenceLength = len(residues) + cu.TAG_LENGTH_ST26['INSDSeq_sequence'] for f in self.seq.features: if f.key == cu.BLANK_PLACEHOLDER and f.location == cu.BLANK_PLACEHOLDER: noteQ = {'INSDQualifierLength': cu.TAG_LENGTH_ST26['INSDQualifier'], 'INSDQualifier_nameLength': len('note') + cu.TAG_LENGTH_ST26['INSDQualifier_name'], 'INSDQualifier_valueLength': len(f.description) + cu.TAG_LENGTH_ST26['INSDQualifier_value'] } sourceFeatureSizeEstimator['qualifiers'].append(noteQ) else: self.featureSizeEstimators.append(self.getOtherFeatureSizeEstimator(f)) featuresEstimatedSize = 0 for fse in self.featureSizeEstimators: qualsSize = sum(sum([q['INSDQualifierLength'], q['INSDQualifier_nameLength'], q['INSDQualifier_valueLength']]) for q in fse['qualifiers']) res = sum([fse['INSDFeatureLength'], fse['INSDFeature_keyLength'], fse['INSDFeature_locationLength'], fse['INSDFeature_qualsLength'], qualsSize]) featuresEstimatedSize += res self.sequenceEstimatedSize = sum([self.sequenceDataLength, self.INSDSeq_lengthLength, self.INSDSeq_moltypeLength, self.INSDSeq_divisionLength, self.INSDSeq_feature_tableLength, featuresEstimatedSize, self.INSDSeq_sequenceLength])
def setSequencesSt26(self): for s25 in self.seql_st25.generateSequence(): residues_st26 = '' if s25.molType in ('DNA', 'RNA'): molType_st26 = s25.molType sourceKey = 'source' organismQualifierName = 'organism' noteQualifierName = 'note' residues_st26 = s25.residues_nuc else: molType_st26 = 'AA' sourceKey = 'SOURCE' organismQualifierName = 'ORGANISM' noteQualifierName = 'NOTE' residues_st26 = converter_util.oneLetterCode(s25.residues_prt) s26 = Sequence(sequenceListing = self.seql_st26, sequenceIdNo = s25.seqIdNo, length = s25.length, moltype = molType_st26, division = 'PAT', # otherSeqId = '-', #optional, so we don't include it in converted sl residues = residues_st26) s26.save() sourceFeature = Feature(sequence=s26, featureKey = sourceKey, location = '1..%s' % s26.length) sourceFeature.save() organismQualifier = Qualifier(feature=sourceFeature, qualifierName=organismQualifierName, qualifierValue=s25.organism) organismQualifier.save() for f in s25.features: currentFeature = Feature(sequence=s26, featureKey = f.key, location = f.location) currentFeature.save() currentQualifier = Qualifier(feature=currentFeature, qualifierName=noteQualifierName, qualifierValue=f.description) currentQualifier.save()
def getOtherFeatureSizeEstimator(self, feat): qualsLength = 0 quals = [] if feat.description != cu.BLANK_PLACEHOLDER: qualsLength = cu.TAG_LENGTH_ST26['INSDFeature_quals'] quals = [{'INSDQualifierLength': cu.TAG_LENGTH_ST26['INSDQualifier'], 'INSDQualifier_nameLength': len('note') + cu.TAG_LENGTH_ST26['INSDQualifier_name'], 'INSDQualifier_valueLength': len(feat.description) + cu.TAG_LENGTH_ST26['INSDQualifier_value'] }] if feat.key == 'CDS': qualsLength = cu.TAG_LENGTH_ST26['INSDFeature_quals'] quals = [{'INSDQualifierLength': cu.TAG_LENGTH_ST26['INSDQualifier'], 'INSDQualifier_nameLength': len('translation') + cu.TAG_LENGTH_ST26['INSDQualifier_name'], 'INSDQualifier_valueLength': len(cu.oneLetterCode(feat.translation)) + cu.TAG_LENGTH_ST26['INSDQualifier_value'] }] return { 'INSDFeatureLength': cu.TAG_LENGTH_ST26['INSDFeature'], 'INSDFeature_keyLength': len(feat.key) + cu.TAG_LENGTH_ST26['INSDFeature_key'], 'INSDFeature_locationLength': len(feat.location) + cu.TAG_LENGTH_ST26['INSDFeature_location'], 'INSDFeature_qualsLength': qualsLength, 'qualifiers': quals }
def setSequencesSt26(self): # for s25 in self.seql_st25.sequences: for s25 in self.seql_st25.generateSequence(): print 'seq', s25.seqIdNo residues_st26 = '' if s25.molType in ('DNA', 'RNA'): molType_st26 = s25.molType sourceKey = 'source' organismQualifierName = 'organism' mol_typeQualifierName = 'mol_type' mol_typeQualifierValue = 'genomic %s' % s25.molType noteQualifierName = 'note' residues_st26 = s25.residues_nuc else: molType_st26 = 'AA' sourceKey = 'SOURCE' organismQualifierName = 'ORGANISM' mol_typeQualifierName = 'MOL_TYPE' mol_typeQualifierValue = 'protein' noteQualifierName = 'NOTE' residues_st26 = converter_util.oneLetterCode(s25.residues_prt) s26 = Sequence(sequenceListing = self.seql_st26, sequenceIdNo = s25.seqIdNo, length = s25.length, moltype = molType_st26, division = 'PAT', # otherSeqId = '-', #optional, so we don't include it in converted sl residues = residues_st26) s26.save() sourceFeature = Feature(sequence=s26, featureKey = sourceKey, location = '1..%s' % s26.length) sourceFeature.save() organismQualifier = Qualifier(feature=sourceFeature, qualifierName=organismQualifierName, qualifierValue=s25.organism) organismQualifier.save() mol_typeQualifier = Qualifier(feature=sourceFeature, qualifierName=mol_typeQualifierName, qualifierValue=mol_typeQualifierValue) mol_typeQualifier.save() for f in s25.features: if f.key == seqlutils.DEFAULT_STRING and f.location == seqlutils.DEFAULT_STRING: sourceNoteQualifier = Qualifier(feature=sourceFeature, qualifierName=noteQualifierName, qualifierValue=f.description) sourceNoteQualifier.save() else: currentFeature = Feature(sequence=s26, featureKey = f.key, location = f.location) currentFeature.save() if f.description != seqlutils.DEFAULT_STRING: currentQualifier = Qualifier(feature=currentFeature, qualifierName=noteQualifierName, qualifierValue=f.description) currentQualifier.save() if f.key == 'CDS': translationQualifierValue = converter_util.oneLetterCode(f.translation) translationQualifier = Qualifier(feature=currentFeature, qualifierName='translation', qualifierValue=translationQualifierValue) translationQualifier.save() self.successful = True
def setSequenceRows(self): res = [] parsedSequences = [] for s in self.seql.generateSequence(): parsedSequences.append(s) # TODO: test if s.molType == 'PRT': self.seql.quantity_prt += 1 else: self.seql.quantity_nuc += 1 if s.mixedMode: self.seql.quantity_mix += 1 for seq in self.seql_raw.raw_sequences: currentIndex = self.seql_raw.raw_sequences.index(seq) parsedSequence = parsedSequences[currentIndex] currentSeqId = parsedSequence.seqIdNo # ====================== 210 ====================== currentRow_SequenceData = self._getSt25St26Lengths(0, currentSeqId, '-', '-', 'SequenceData', 'ST.26 specific element') res.append(currentRow_SequenceData) currentRow210 = self._getSt25St26Lengths(210, currentSeqId, seq.seqIdNo, parsedSequence.seqIdNo, 'sequenceIDNumber', '-') res.append(currentRow210) currentRow_INSDSeq = self._getSt25St26Lengths(0, currentSeqId, '-', '-', 'INSDSeq', 'ST.26 specific element') res.append(currentRow_INSDSeq) # ====================== 211 ====================== currentRow211 = self._getSt25St26Lengths(211, currentSeqId, seq.length, parsedSequence.length, 'INSDSeq_length', cu.BLANK_PLACEHOLDER) res.append(currentRow211) # ====================== 212 ====================== moltypeValue = 'AA' if parsedSequence.molType == 'PRT' else parsedSequence.molType currentRow212 = [212, currentSeqId, cu.safeLength(seq.molType), cu.safeLength(parsedSequence.molType), cu.TAG_LENGTH_ST26['INSDSeq_moltype'], cu.safeLength(moltypeValue) + cu.TAG_LENGTH_ST26['INSDSeq_moltype'], 'INSDSeq_moltype', 'PRT replaced by AA for protein raw_sequences' if moltypeValue == 'AA' else cu.BLANK_PLACEHOLDER] res.append(currentRow212) # ====================== INSDSeq_division ====================== INSDSeq_division_val = 'PAT' currentRow_INSDSeq_division = self._getSt25St26Lengths(0, currentSeqId, '-', INSDSeq_division_val, 'INSDSeq_division', 'ST.26 specific element') res.append(currentRow_INSDSeq_division) # ====================== INSDSeq_other-seqids ====================== # optional element, therefore not included in calculations # ====================== INSDSeq_feature-table ====================== currentRow_INSDSeq_feature_table = self._getSt25St26Lengths(0, currentSeqId, '-', '-', 'INSDSeq_feature-table', 'ST.26 specific element') res.append(currentRow_INSDSeq_feature_table) # ====================== 213 ====================== # create ST.26 feature source currentRow_INSDFeature = [0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature'], cu.TAG_LENGTH_ST26['INSDFeature'], 'INSDFeature', 'ST.26 mandatory feature source'] res.append(currentRow_INSDFeature) currentRow_INSDFeature_key = [0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_key'], len('source') + cu.TAG_LENGTH_ST26['INSDFeature_key'], 'INSDFeature_key', 'ST.26 mandatory feature source'] res.append(currentRow_INSDFeature_key) sourceLocation = '1..%s' % parsedSequence.length currentRow_INSDFeature_location = [0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_location'], len(sourceLocation) + cu.TAG_LENGTH_ST26['INSDFeature_location'], 'INSDFeature_location', 'ST.26 mandatory feature source'] res.append(currentRow_INSDFeature_location) def append_INSDFeature_quals(msg): res.append([0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_quals'], cu.TAG_LENGTH_ST26['INSDFeature_quals'], 'INSDFeature_quals', msg]) # add first the parent element INSDFeature_quals append_INSDFeature_quals('ST.26 mandatory feature source') def createQualifier(name, msg): currentRow_INSDQualifier = [0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier'], cu.TAG_LENGTH_ST26['INSDQualifier'], 'INSDQualifier', msg] res.append(currentRow_INSDQualifier) currentRow_INSDQualifier_name = [0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier_name'], len(name) + cu.TAG_LENGTH_ST26['INSDQualifier_name'], 'INSDQualifier_name', msg] res.append(currentRow_INSDQualifier_name) def createQualifierValue(tag_st25, element_st25, value_st25, msg): currentRow_INSDQualifier_value = [tag_st25, currentSeqId, cu.safeLength(element_st25), cu.safeLength(value_st25), cu.TAG_LENGTH_ST26['INSDQualifier_value'], cu.safeLength(value_st25) + cu.TAG_LENGTH_ST26['INSDQualifier_value'], 'INSDQualifier_value', msg] res.append(currentRow_INSDQualifier_value) # qualifier organism createQualifier('organism', 'ST.26 mandatory qualifier organism') createQualifierValue(213, seq.organism, parsedSequence.organism, 'ST.26 mandatory qualifier organism') # qualifier mol_type mol_typeValue = 'protein' if parsedSequence.molType == 'PRT' else 'genomic DNA' createQualifier('mol_type', 'ST.26 mandatory qualifier mol_type') # createQualifierValue(0, 0, mol_typeValue, 'ST.26 mandatory qualifier mol_type') res.append([0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier_value'], cu.safeLength(mol_typeValue) + cu.TAG_LENGTH_ST26['INSDQualifier_value'], 'INSDQualifier_value', 'ST.26 mandatory qualifier mol_type']) # end create ST.26 feature source # ====================== other features ====================== parsedFeatures = parsedSequence.features for feat in seq.features: currentFeatureIndex = seq.features.index(feat) parsedFeature = parsedFeatures[currentFeatureIndex] isSimpleFeature = False if parsedFeature.key == cu.BLANK_PLACEHOLDER and parsedFeature.location == cu.BLANK_PLACEHOLDER: isSimpleFeature = True if not isSimpleFeature: # ====================== 220 ====================== currentRow220 = self._getSt25St26Lengths(220, currentSeqId, feat.featureHeader, parsedFeature.featureHeader, 'INSDFeature', cu.BLANK_PLACEHOLDER) res.append(currentRow220) # ====================== 221 ====================== currentRow221 = self._getSt25St26Lengths(221, currentSeqId, feat.key, parsedFeature.key, 'INSDFeature_key', cu.BLANK_PLACEHOLDER) res.append(currentRow221) # ====================== add row for mixed mode translation qualifier ====================== if parsedFeature.key == 'CDS': createQualifier('translation', 'ST.26 specific element translation') translationRow = [400, currentSeqId, 0, cu.safeLength(parsedFeature.translation), cu.TAG_LENGTH_ST26['INSDQualifier_value'], (cu.TAG_LENGTH_ST26['INSDQualifier_value'] + len(cu.oneLetterCode(parsedFeature.translation))), 'INSDQualifier_value', '3-to-1 letter code'] res.append(translationRow) # ====================== 222 ====================== currentRow222 = self._getSt25St26Lengths(222, currentSeqId, feat.location, parsedFeature.location, 'INSDFeature_location', cu.BLANK_PLACEHOLDER) res.append(currentRow222) # ====================== 223 ====================== if parsedFeature.description != cu.BLANK_PLACEHOLDER: #do not add row if 223 missing! append_INSDFeature_quals('ST.26 mandatory element') createQualifier('note', cu.BLANK_PLACEHOLDER) createQualifierValue(223, feat.description, parsedFeature.description, cu.BLANK_PLACEHOLDER) # ====================== 400 ====================== if parsedSequence.molType == 'PRT': parsedResidues = parsedSequence.residues_prt currentRow400 = [400, currentSeqId, cu.safeLength(seq.residues), cu.safeLength(parsedResidues), cu.TAG_LENGTH_ST26['INSDSeq_sequence'], (cu.TAG_LENGTH_ST26['INSDSeq_sequence'] + len(cu.oneLetterCode(parsedResidues))), 'INSDSeq_sequence', '3-to-1 letter code'] else: parsedResidues = parsedSequence.residues_nuc currentRow400 = self._getSt25St26Lengths(400, currentSeqId, seq.residues, parsedResidues, 'INSDSeq_sequence', cu.BLANK_PLACEHOLDER) res.append(currentRow400) return res
def setSequencesSt26(self): # for s25 in self.seql_st25.sequences: for s25 in self.seql_st25.generateSequence(): print 'seq', s25.seqIdNo residues_st26 = '' if s25.molType in ('DNA', 'RNA'): molType_st26 = s25.molType sourceKey = 'source' organismQualifierName = 'organism' mol_typeQualifierName = 'mol_type' mol_typeQualifierValue = 'genomic %s' % s25.molType noteQualifierName = 'note' residues_st26 = s25.residues_nuc else: molType_st26 = 'AA' sourceKey = 'SOURCE' organismQualifierName = 'ORGANISM' mol_typeQualifierName = 'MOL_TYPE' mol_typeQualifierValue = 'protein' noteQualifierName = 'NOTE' residues_st26 = converter_util.oneLetterCode(s25.residues_prt) s26 = Sequence( sequenceListing=self.seql_st26, sequenceIdNo=s25.seqIdNo, length=s25.length, moltype=molType_st26, division='PAT', # otherSeqId = '-', #optional, so we don't include it in converted sl residues=residues_st26) s26.save() sourceFeature = Feature(sequence=s26, featureKey=sourceKey, location='1..%s' % s26.length) sourceFeature.save() organismQualifier = Qualifier(feature=sourceFeature, qualifierName=organismQualifierName, qualifierValue=s25.organism) organismQualifier.save() mol_typeQualifier = Qualifier( feature=sourceFeature, qualifierName=mol_typeQualifierName, qualifierValue=mol_typeQualifierValue) mol_typeQualifier.save() for f in s25.features: if f.key == seqlutils.DEFAULT_STRING and f.location == seqlutils.DEFAULT_STRING: sourceNoteQualifier = Qualifier( feature=sourceFeature, qualifierName=noteQualifierName, qualifierValue=f.description) sourceNoteQualifier.save() else: currentFeature = Feature(sequence=s26, featureKey=f.key, location=f.location) currentFeature.save() if f.description != seqlutils.DEFAULT_STRING: currentQualifier = Qualifier( feature=currentFeature, qualifierName=noteQualifierName, qualifierValue=f.description) currentQualifier.save() if f.key == 'CDS': translationQualifierValue = converter_util.oneLetterCode( f.translation) translationQualifier = Qualifier( feature=currentFeature, qualifierName='translation', qualifierValue=translationQualifierValue) translationQualifier.save() self.successful = True
def setSequenceRows(self): res = [] parsedSequences = [] for s in self.seql.generateSequence(): parsedSequences.append(s) # TODO: test if s.molType == 'PRT': self.seql.quantity_prt += 1 else: self.seql.quantity_nuc += 1 if s.mixedMode: self.seql.quantity_mix += 1 for seq in self.seql_raw.raw_sequences: currentIndex = self.seql_raw.raw_sequences.index(seq) parsedSequence = parsedSequences[currentIndex] currentSeqId = parsedSequence.seqIdNo # ====================== 210 ====================== currentRow_SequenceData = self._getSt25St26Lengths( 0, currentSeqId, '-', '-', 'SequenceData', 'ST.26 specific element') res.append(currentRow_SequenceData) currentRow210 = self._getSt25St26Lengths(210, currentSeqId, seq.seqIdNo, parsedSequence.seqIdNo, 'sequenceIDNumber', '-') res.append(currentRow210) currentRow_INSDSeq = self._getSt25St26Lengths( 0, currentSeqId, '-', '-', 'INSDSeq', 'ST.26 specific element') res.append(currentRow_INSDSeq) # ====================== 211 ====================== currentRow211 = self._getSt25St26Lengths(211, currentSeqId, seq.length, parsedSequence.length, 'INSDSeq_length', cu.BLANK_PLACEHOLDER) res.append(currentRow211) # ====================== 212 ====================== moltypeValue = 'AA' if parsedSequence.molType == 'PRT' else parsedSequence.molType currentRow212 = [ 212, currentSeqId, cu.safeLength(seq.molType), cu.safeLength(parsedSequence.molType), cu.TAG_LENGTH_ST26['INSDSeq_moltype'], cu.safeLength(moltypeValue) + cu.TAG_LENGTH_ST26['INSDSeq_moltype'], 'INSDSeq_moltype', 'PRT replaced by AA for protein raw_sequences' if moltypeValue == 'AA' else cu.BLANK_PLACEHOLDER ] res.append(currentRow212) # ====================== INSDSeq_division ====================== INSDSeq_division_val = 'PAT' currentRow_INSDSeq_division = self._getSt25St26Lengths( 0, currentSeqId, '-', INSDSeq_division_val, 'INSDSeq_division', 'ST.26 specific element') res.append(currentRow_INSDSeq_division) # ====================== INSDSeq_other-seqids ====================== # optional element, therefore not included in calculations # ====================== INSDSeq_feature-table ====================== currentRow_INSDSeq_feature_table = self._getSt25St26Lengths( 0, currentSeqId, '-', '-', 'INSDSeq_feature-table', 'ST.26 specific element') res.append(currentRow_INSDSeq_feature_table) # ====================== 213 ====================== # create ST.26 feature source currentRow_INSDFeature = [ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature'], cu.TAG_LENGTH_ST26['INSDFeature'], 'INSDFeature', 'ST.26 mandatory feature source' ] res.append(currentRow_INSDFeature) currentRow_INSDFeature_key = [ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_key'], len('source') + cu.TAG_LENGTH_ST26['INSDFeature_key'], 'INSDFeature_key', 'ST.26 mandatory feature source' ] res.append(currentRow_INSDFeature_key) sourceLocation = '1..%s' % parsedSequence.length currentRow_INSDFeature_location = [ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_location'], len(sourceLocation) + cu.TAG_LENGTH_ST26['INSDFeature_location'], 'INSDFeature_location', 'ST.26 mandatory feature source' ] res.append(currentRow_INSDFeature_location) def append_INSDFeature_quals(msg): res.append([ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_quals'], cu.TAG_LENGTH_ST26['INSDFeature_quals'], 'INSDFeature_quals', msg ]) # add first the parent element INSDFeature_quals append_INSDFeature_quals('ST.26 mandatory feature source') def createQualifier(name, msg): currentRow_INSDQualifier = [ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier'], cu.TAG_LENGTH_ST26['INSDQualifier'], 'INSDQualifier', msg ] res.append(currentRow_INSDQualifier) currentRow_INSDQualifier_name = [ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier_name'], len(name) + cu.TAG_LENGTH_ST26['INSDQualifier_name'], 'INSDQualifier_name', msg ] res.append(currentRow_INSDQualifier_name) def createQualifierValue(tag_st25, element_st25, value_st25, msg): currentRow_INSDQualifier_value = [ tag_st25, currentSeqId, cu.safeLength(element_st25), cu.safeLength(value_st25), cu.TAG_LENGTH_ST26['INSDQualifier_value'], cu.safeLength(value_st25) + cu.TAG_LENGTH_ST26['INSDQualifier_value'], 'INSDQualifier_value', msg ] res.append(currentRow_INSDQualifier_value) # qualifier organism createQualifier('organism', 'ST.26 mandatory qualifier organism') createQualifierValue(213, seq.organism, parsedSequence.organism, 'ST.26 mandatory qualifier organism') # qualifier mol_type mol_typeValue = 'protein' if parsedSequence.molType == 'PRT' else 'genomic DNA' createQualifier('mol_type', 'ST.26 mandatory qualifier mol_type') # createQualifierValue(0, 0, mol_typeValue, 'ST.26 mandatory qualifier mol_type') res.append([ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier_value'], cu.safeLength(mol_typeValue) + cu.TAG_LENGTH_ST26['INSDQualifier_value'], 'INSDQualifier_value', 'ST.26 mandatory qualifier mol_type' ]) # end create ST.26 feature source # ====================== other features ====================== parsedFeatures = parsedSequence.features for feat in seq.features: currentFeatureIndex = seq.features.index(feat) parsedFeature = parsedFeatures[currentFeatureIndex] isSimpleFeature = False if parsedFeature.key == cu.BLANK_PLACEHOLDER and parsedFeature.location == cu.BLANK_PLACEHOLDER: isSimpleFeature = True if not isSimpleFeature: # ====================== 220 ====================== currentRow220 = self._getSt25St26Lengths( 220, currentSeqId, feat.featureHeader, parsedFeature.featureHeader, 'INSDFeature', cu.BLANK_PLACEHOLDER) res.append(currentRow220) # ====================== 221 ====================== currentRow221 = self._getSt25St26Lengths( 221, currentSeqId, feat.key, parsedFeature.key, 'INSDFeature_key', cu.BLANK_PLACEHOLDER) res.append(currentRow221) # ====================== add row for mixed mode translation qualifier ====================== if parsedFeature.key == 'CDS': createQualifier('translation', 'ST.26 specific element translation') translationRow = [ 400, currentSeqId, 0, cu.safeLength(parsedFeature.translation), cu.TAG_LENGTH_ST26['INSDQualifier_value'], (cu.TAG_LENGTH_ST26['INSDQualifier_value'] + len(cu.oneLetterCode(parsedFeature.translation))), 'INSDQualifier_value', '3-to-1 letter code' ] res.append(translationRow) # ====================== 222 ====================== currentRow222 = self._getSt25St26Lengths( 222, currentSeqId, feat.location, parsedFeature.location, 'INSDFeature_location', cu.BLANK_PLACEHOLDER) res.append(currentRow222) # ====================== 223 ====================== if parsedFeature.description != cu.BLANK_PLACEHOLDER: #do not add row if 223 missing! append_INSDFeature_quals('ST.26 mandatory element') createQualifier('note', cu.BLANK_PLACEHOLDER) createQualifierValue(223, feat.description, parsedFeature.description, cu.BLANK_PLACEHOLDER) # ====================== 400 ====================== if parsedSequence.molType == 'PRT': parsedResidues = parsedSequence.residues_prt currentRow400 = [ 400, currentSeqId, cu.safeLength(seq.residues), cu.safeLength(parsedResidues), cu.TAG_LENGTH_ST26['INSDSeq_sequence'], (cu.TAG_LENGTH_ST26['INSDSeq_sequence'] + len(cu.oneLetterCode(parsedResidues))), 'INSDSeq_sequence', '3-to-1 letter code' ] else: parsedResidues = parsedSequence.residues_nuc currentRow400 = self._getSt25St26Lengths( 400, currentSeqId, seq.residues, parsedResidues, 'INSDSeq_sequence', cu.BLANK_PLACEHOLDER) res.append(currentRow400) return res
def test_setSequencesSt26(self): sequences = self.sc1.seql_st26.sequence_set.all() self.assertEqual(4, sequences.count()) s2 = sequences.get(sequenceIdNo=2) s4 = sequences.get(sequenceIdNo=4) self.assertEqual('DNA', s2.moltype) self.assertEqual('AA', s4.moltype) self.assertEqual('ttgaccaagctggggaccccggtcccttgggaccagtggcagaggagtc', s2.residues) features_s2 = s2.feature_set.all() self.assertEqual("3'clip", features_s2[1].featureKey) self.assertEqual("1..30", features_s2[1].location) sequences_1004 = self.sc1004.seql_st26.sequence_set.all() sequence_1004_1 = sequences_1004.get(sequenceIdNo=1) sequence_1004_7 = sequences_1004.get(sequenceIdNo=7) self.assertEqual(903, sequence_1004_1.length) features_1004_1 = sequence_1004_1.feature_set.all() self.assertEqual("CDS", features_1004_1[1].featureKey) self.assertEqual("(1)..(903)", features_1004_1[1].location) # test that feature description missing is not converted to empty element s1 = sequences.get(sequenceIdNo=1) features_s1 = s1.feature_set.all() for f in features_s1: qualifiers = f.qualifier_set.all() for q in qualifiers: self.assertFalse(q.qualifierName in ['note', "NOTE"]) features_s4 = s4.feature_set.all() for f in features_s4: qualifiers = f.qualifier_set.all() for q in qualifiers: if q.qualifierName == 'NOTE': exp = 'influenza virus A hemagglutinin subtype H9' self.assertEqual(exp, q.qualifierValue) # ============== tests for mixed mode ================================== translQualifier_seq1 = features_1004_1[1].qualifier_set.all()[0] self.assertEqual("translation", translQualifier_seq1.qualifierName) translQualValue_exp = converter_util.oneLetterCode(self.sc1004_seql_st25_sequences[0].residues_prt) self.assertEqual(translQualValue_exp, translQualifier_seq1.qualifierValue) features_1004_7 = sequence_1004_7.feature_set.all() self.assertEqual("CDS", features_1004_7[1].featureKey) self.assertEqual("(1)..(84)", features_1004_7[1].location) translQualifier7_1 = features_1004_7[1].qualifier_set.all()[0] self.assertEqual("translation", translQualifier7_1.qualifierName) translQualifier7_2 = features_1004_7[2].qualifier_set.all()[0] self.assertEqual("translation", translQualifier7_2.qualifierName) translation1 = converter_util.oneLetterCode('MetLysLysSerLeuValLeuLysAlaSerValAlaValAlaThrLeuValProMetLeuSerPheAlaAlaGluGlyGluPhe') translation2 = converter_util.oneLetterCode('AspProAlaLysAlaAlaPheAspSerLeuGlnAlaSerAlaThrGluTyrIleGlyTyrAlaTrpAlaMetValValValIleValGlyAlaThrIleGlyIleLysLeuPheLysLysPheThrSerLysAlaSer') self.assertEqual(translation1, translQualifier7_1.qualifierValue) self.assertEqual(translation2, translQualifier7_2.qualifierValue) # ============== tests for simple feature conversion ================================== sourceFeature_1004_7 = features_1004_7[0] noteQual = sourceFeature_1004_7.qualifier_set.get(qualifierName='note') self.assertEqual('pc89 major coat protein PVIII', noteQual.qualifierValue)