def setRow_header(self):
     self.generalInformationRows.append([0, 0, 
             cu.safeLength(self.seql_raw.seqlHeader),
             cu.safeLength(self.seql.generalInformation.seqlHeader),
             cu.TAG_LENGTH_ST26['ST26SequenceListing'],
             cu.TAG_LENGTH_ST26['ST26SequenceListing'],
             'ST26SequenceListing', 
             'ST.25 seqlHeader discarded'])
 def setRow_header(self):
     self.generalInformationRows.append([
         0, 0,
         cu.safeLength(self.seql_raw.seqlHeader),
         cu.safeLength(self.seql.generalInformation.seqlHeader),
         cu.TAG_LENGTH_ST26['ST26SequenceListing'],
         cu.TAG_LENGTH_ST26['ST26SequenceListing'], 'ST26SequenceListing',
         'ST.25 seqlHeader discarded'
     ])
 def createQualifierValue(tag_st25, element_st25, value_st25, msg):
     
     currentRow_INSDQualifier_value = [tag_st25, 
         currentSeqId, cu.safeLength(element_st25), 
         cu.safeLength(value_st25), 
         cu.TAG_LENGTH_ST26['INSDQualifier_value'], 
         cu.safeLength(value_st25) + cu.TAG_LENGTH_ST26['INSDQualifier_value'],
         'INSDQualifier_value', 
         msg]
 
     res.append(currentRow_INSDQualifier_value)
    def _getSt25St26Lengths(self, element_st25_tag, seqIdNo, element_st25,
                            value_st25, element_st26, comment):

        return [
            element_st25_tag, seqIdNo,
            cu.safeLength(element_st25),
            cu.safeLength(value_st25),
            0 if element_st26 == '-' else cu.TAG_LENGTH_ST26[element_st26],
            0 if element_st26 == '-' else cu.TAG_LENGTH_ST26[element_st26] +
            cu.safeLength(value_st25), element_st26, comment
        ]
            def createQualifierValue(tag_st25, element_st25, value_st25, msg):

                currentRow_INSDQualifier_value = [
                    tag_st25, currentSeqId,
                    cu.safeLength(element_st25),
                    cu.safeLength(value_st25),
                    cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                    cu.safeLength(value_st25) +
                    cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                    'INSDQualifier_value', msg
                ]

                res.append(currentRow_INSDQualifier_value)
 def _getSt25St26Lengths(self,
                     element_st25_tag, 
                     seqIdNo,
                     element_st25, 
                     value_st25, 
                     element_st26, comment):
     
     return [element_st25_tag, 
             seqIdNo,
             cu.safeLength(element_st25), 
             cu.safeLength(value_st25),
             0 if element_st26 == '-' else cu.TAG_LENGTH_ST26[element_st26],
             0 if element_st26 == '-' else cu.TAG_LENGTH_ST26[element_st26] + 
                                 cu.safeLength(value_st25),
             element_st26, 
             comment
             ]    
 def setRow_prio(self):
     
     res = ['prio', 0, cu.safeLength(self.seql_raw.priorities)]
     
     priority_clean = self.seql.generalInformation.priority
     pr_length = 0
     if priority_clean:
         pr = priority_clean[0]
         pr_applNr = pr[0]
         pr_filingDate = pr[1]
         
         pr_length = cu.safeLength(pr_applNr) + cu.safeLength(pr_filingDate)
     
     res.append(pr_length)
     res.append(cu.TAG_LENGTH_ST26['EarliestPriorityApplicationIdentification'] + 
             cu.TAG_LENGTH_ST26['IPOfficeCode'] + 
             cu.TAG_LENGTH_ST26['ApplicationNumberText'] + 
             cu.TAG_LENGTH_ST26['FilingDate'])
     res.append(pr_length + res[4])
     res.append('EarliestPriorityApplicationIdentification')
     res.append('only first ST.25 priority retained, if any')
     
     self.generalInformationRows.append(res)
    def setRow_prio(self):

        res = ['prio', 0, cu.safeLength(self.seql_raw.priorities)]

        priority_clean = self.seql.generalInformation.priority
        pr_length = 0
        if priority_clean:
            pr = priority_clean[0]
            pr_applNr = pr[0]
            pr_filingDate = pr[1]

            pr_length = cu.safeLength(pr_applNr) + cu.safeLength(pr_filingDate)

        res.append(pr_length)
        res.append(
            cu.TAG_LENGTH_ST26['EarliestPriorityApplicationIdentification'] +
            cu.TAG_LENGTH_ST26['IPOfficeCode'] +
            cu.TAG_LENGTH_ST26['ApplicationNumberText'] +
            cu.TAG_LENGTH_ST26['FilingDate'])
        res.append(pr_length + res[4])
        res.append('EarliestPriorityApplicationIdentification')
        res.append('only first ST.25 priority retained, if any')

        self.generalInformationRows.append(res)
 def test_safeLength(self):
     self.assertEqual(0, cu.safeLength(None))
     self.assertEqual(3, cu.safeLength('abc'))
     self.assertEqual(20, cu.safeLength('<400>  40\r\n\r\nMet Ser'))
Exemple #10
0
    def setSequenceRows(self):
        res = []
        
        parsedSequences = []
        for s in self.seql.generateSequence():
            parsedSequences.append(s)
#             TODO: test
            if s.molType == 'PRT':
                self.seql.quantity_prt += 1 
            else:
                self.seql.quantity_nuc += 1
                if s.mixedMode:
                    self.seql.quantity_mix += 1
                
        for seq in self.seql_raw.raw_sequences:
            
            currentIndex = self.seql_raw.raw_sequences.index(seq)
            parsedSequence = parsedSequences[currentIndex]
            currentSeqId = parsedSequence.seqIdNo
# ====================== 210 ======================
            currentRow_SequenceData = self._getSt25St26Lengths(0, currentSeqId, 
                            '-', '-', 'SequenceData', 
                            'ST.26 specific element')

            res.append(currentRow_SequenceData)
            
            currentRow210 = self._getSt25St26Lengths(210, currentSeqId, 
                            seq.seqIdNo, parsedSequence.seqIdNo, 'sequenceIDNumber', '-')

            res.append(currentRow210)
            
            currentRow_INSDSeq = self._getSt25St26Lengths(0, currentSeqId, 
                            '-', '-', 'INSDSeq', 'ST.26 specific element')

            res.append(currentRow_INSDSeq)

# ====================== 211 ======================            
            currentRow211 = self._getSt25St26Lengths(211, currentSeqId, 
                            seq.length, parsedSequence.length, 'INSDSeq_length', cu.BLANK_PLACEHOLDER)
            res.append(currentRow211)

# ====================== 212 ======================            
            moltypeValue = 'AA' if parsedSequence.molType == 'PRT' else parsedSequence.molType 

            currentRow212 = [212, currentSeqId, cu.safeLength(seq.molType), 
                            cu.safeLength(parsedSequence.molType), 
                            cu.TAG_LENGTH_ST26['INSDSeq_moltype'], 
                            cu.safeLength(moltypeValue) + cu.TAG_LENGTH_ST26['INSDSeq_moltype'],
                            'INSDSeq_moltype', 
                            'PRT replaced by AA for protein raw_sequences' if moltypeValue == 'AA' else cu.BLANK_PLACEHOLDER]
            
            res.append(currentRow212)

# ====================== INSDSeq_division ======================            
            INSDSeq_division_val = 'PAT'
            currentRow_INSDSeq_division = self._getSt25St26Lengths(0, currentSeqId, 
                            '-', INSDSeq_division_val, 'INSDSeq_division', 
                            'ST.26 specific element')
            res.append(currentRow_INSDSeq_division)

# ====================== INSDSeq_other-seqids ======================
# optional element, therefore not included in calculations

# ====================== INSDSeq_feature-table ======================            
            currentRow_INSDSeq_feature_table = self._getSt25St26Lengths(0, 
                            currentSeqId, 
                            '-', '-', 'INSDSeq_feature-table', 
                            'ST.26 specific element')
            res.append(currentRow_INSDSeq_feature_table)

# ====================== 213 ======================                        
#             create ST.26 feature source
            currentRow_INSDFeature = [0, currentSeqId, 0, 0, 
                            cu.TAG_LENGTH_ST26['INSDFeature'], 
                            cu.TAG_LENGTH_ST26['INSDFeature'],
                            'INSDFeature', 
                            'ST.26 mandatory feature source']
            res.append(currentRow_INSDFeature)
            
            currentRow_INSDFeature_key = [0, currentSeqId, 0, 0, 
                            cu.TAG_LENGTH_ST26['INSDFeature_key'], 
                            len('source') + cu.TAG_LENGTH_ST26['INSDFeature_key'],
                            'INSDFeature_key', 
                            'ST.26 mandatory feature source']
            
            res.append(currentRow_INSDFeature_key)
            
            sourceLocation = '1..%s' % parsedSequence.length
            currentRow_INSDFeature_location = [0, currentSeqId, 0, 0, 
                            cu.TAG_LENGTH_ST26['INSDFeature_location'], 
                            len(sourceLocation) + cu.TAG_LENGTH_ST26['INSDFeature_location'],
                            'INSDFeature_location', 
                            'ST.26 mandatory feature source']
            
            res.append(currentRow_INSDFeature_location)
            
            def append_INSDFeature_quals(msg):
                res.append([0, currentSeqId, 0, 0, 
                            cu.TAG_LENGTH_ST26['INSDFeature_quals'], 
                            cu.TAG_LENGTH_ST26['INSDFeature_quals'],
                            'INSDFeature_quals', 
                            msg])
            
#             add first the parent element INSDFeature_quals
            append_INSDFeature_quals('ST.26 mandatory feature source')
            
            def createQualifier(name, msg):
                currentRow_INSDQualifier = [0, currentSeqId, 0, 0, 
                            cu.TAG_LENGTH_ST26['INSDQualifier'], 
                            cu.TAG_LENGTH_ST26['INSDQualifier'],
                            'INSDQualifier', 
                            msg]
            
                res.append(currentRow_INSDQualifier)
                
                currentRow_INSDQualifier_name = [0, currentSeqId, 0, 0, 
                            cu.TAG_LENGTH_ST26['INSDQualifier_name'], 
                            len(name) + cu.TAG_LENGTH_ST26['INSDQualifier_name'],
                            'INSDQualifier_name', 
                            msg]
            
                res.append(currentRow_INSDQualifier_name)
            
            def createQualifierValue(tag_st25, element_st25, value_st25, msg):
                
                currentRow_INSDQualifier_value = [tag_st25, 
                    currentSeqId, cu.safeLength(element_st25), 
                    cu.safeLength(value_st25), 
                    cu.TAG_LENGTH_ST26['INSDQualifier_value'], 
                    cu.safeLength(value_st25) + cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                    'INSDQualifier_value', 
                    msg]
            
                res.append(currentRow_INSDQualifier_value)
            
#             qualifier organism
            createQualifier('organism', 'ST.26 mandatory qualifier organism')
            createQualifierValue(213, seq.organism, 
                            parsedSequence.organism, 
                            'ST.26 mandatory qualifier organism')

#             qualifier mol_type
            mol_typeValue = 'protein' if parsedSequence.molType == 'PRT' else 'genomic DNA'
            createQualifier('mol_type', 'ST.26 mandatory qualifier mol_type') 
#             createQualifierValue(0, 0, mol_typeValue, 'ST.26 mandatory qualifier mol_type')
            res.append([0, currentSeqId, 0, 0,  
                    cu.TAG_LENGTH_ST26['INSDQualifier_value'], 
                    cu.safeLength(mol_typeValue) + cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                    'INSDQualifier_value', 
                    'ST.26 mandatory qualifier mol_type'])
            
#             end create ST.26 feature source

# ====================== other features ======================        
            parsedFeatures = parsedSequence.features
            for feat in seq.features:
                currentFeatureIndex = seq.features.index(feat)
                parsedFeature = parsedFeatures[currentFeatureIndex]
                isSimpleFeature = False
                if parsedFeature.key == cu.BLANK_PLACEHOLDER and parsedFeature.location == cu.BLANK_PLACEHOLDER:
                    isSimpleFeature = True 
                if not isSimpleFeature:
                    # ====================== 220 ======================                
                    currentRow220 = self._getSt25St26Lengths(220, currentSeqId, 
                                feat.featureHeader, parsedFeature.featureHeader, 
                                'INSDFeature', cu.BLANK_PLACEHOLDER)
                    res.append(currentRow220)
    
                    # ====================== 221 ======================                
                    currentRow221 = self._getSt25St26Lengths(221, currentSeqId, 
                                feat.key, parsedFeature.key, 
                                'INSDFeature_key', cu.BLANK_PLACEHOLDER)
                    res.append(currentRow221)
    
                    # ====================== add row for mixed mode translation qualifier ======================                               
                    if parsedFeature.key == 'CDS':
                        createQualifier('translation', 'ST.26 specific element translation')
                        translationRow = [400, currentSeqId, 
                                0, 
                                cu.safeLength(parsedFeature.translation),
                                cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                                (cu.TAG_LENGTH_ST26['INSDQualifier_value'] + 
                                len(cu.oneLetterCode(parsedFeature.translation))),
                                'INSDQualifier_value', '3-to-1 letter code']
                        
                        res.append(translationRow)
    
                    # ====================== 222 ======================                
                    currentRow222 = self._getSt25St26Lengths(222, currentSeqId, 
                                feat.location, parsedFeature.location, 
                                'INSDFeature_location', cu.BLANK_PLACEHOLDER)
                    res.append(currentRow222)
                
# ====================== 223 ======================                
                if parsedFeature.description != cu.BLANK_PLACEHOLDER: #do not add row if 223 missing!
                    append_INSDFeature_quals('ST.26 mandatory element')
                    createQualifier('note', cu.BLANK_PLACEHOLDER)
                    createQualifierValue(223, feat.description, 
                                        parsedFeature.description, 
                                        cu.BLANK_PLACEHOLDER)
                                   
# ====================== 400 ======================        
            if parsedSequence.molType == 'PRT':
                parsedResidues = parsedSequence.residues_prt
                currentRow400 = [400, currentSeqId, 
                            cu.safeLength(seq.residues), 
                            cu.safeLength(parsedResidues),
                            cu.TAG_LENGTH_ST26['INSDSeq_sequence'],
                            (cu.TAG_LENGTH_ST26['INSDSeq_sequence'] + 
                            len(cu.oneLetterCode(parsedResidues))),
                            'INSDSeq_sequence', '3-to-1 letter code']
                
            else:
                parsedResidues = parsedSequence.residues_nuc
                currentRow400 = self._getSt25St26Lengths(400, currentSeqId, 
                                seq.residues, parsedResidues, 
                                'INSDSeq_sequence', cu.BLANK_PLACEHOLDER)
            res.append(currentRow400)
        
        return res 
Exemple #11
0
    def setSequenceRows(self):
        res = []

        parsedSequences = []
        for s in self.seql.generateSequence():
            parsedSequences.append(s)
            #             TODO: test
            if s.molType == 'PRT':
                self.seql.quantity_prt += 1
            else:
                self.seql.quantity_nuc += 1
                if s.mixedMode:
                    self.seql.quantity_mix += 1

        for seq in self.seql_raw.raw_sequences:

            currentIndex = self.seql_raw.raw_sequences.index(seq)
            parsedSequence = parsedSequences[currentIndex]
            currentSeqId = parsedSequence.seqIdNo
            # ====================== 210 ======================
            currentRow_SequenceData = self._getSt25St26Lengths(
                0, currentSeqId, '-', '-', 'SequenceData',
                'ST.26 specific element')

            res.append(currentRow_SequenceData)

            currentRow210 = self._getSt25St26Lengths(210, currentSeqId,
                                                     seq.seqIdNo,
                                                     parsedSequence.seqIdNo,
                                                     'sequenceIDNumber', '-')

            res.append(currentRow210)

            currentRow_INSDSeq = self._getSt25St26Lengths(
                0, currentSeqId, '-', '-', 'INSDSeq', 'ST.26 specific element')

            res.append(currentRow_INSDSeq)

            # ====================== 211 ======================
            currentRow211 = self._getSt25St26Lengths(211, currentSeqId,
                                                     seq.length,
                                                     parsedSequence.length,
                                                     'INSDSeq_length',
                                                     cu.BLANK_PLACEHOLDER)
            res.append(currentRow211)

            # ====================== 212 ======================
            moltypeValue = 'AA' if parsedSequence.molType == 'PRT' else parsedSequence.molType

            currentRow212 = [
                212, currentSeqId,
                cu.safeLength(seq.molType),
                cu.safeLength(parsedSequence.molType),
                cu.TAG_LENGTH_ST26['INSDSeq_moltype'],
                cu.safeLength(moltypeValue) +
                cu.TAG_LENGTH_ST26['INSDSeq_moltype'], 'INSDSeq_moltype',
                'PRT replaced by AA for protein raw_sequences'
                if moltypeValue == 'AA' else cu.BLANK_PLACEHOLDER
            ]

            res.append(currentRow212)

            # ====================== INSDSeq_division ======================
            INSDSeq_division_val = 'PAT'
            currentRow_INSDSeq_division = self._getSt25St26Lengths(
                0, currentSeqId, '-', INSDSeq_division_val, 'INSDSeq_division',
                'ST.26 specific element')
            res.append(currentRow_INSDSeq_division)

            # ====================== INSDSeq_other-seqids ======================
            # optional element, therefore not included in calculations

            # ====================== INSDSeq_feature-table ======================
            currentRow_INSDSeq_feature_table = self._getSt25St26Lengths(
                0, currentSeqId, '-', '-', 'INSDSeq_feature-table',
                'ST.26 specific element')
            res.append(currentRow_INSDSeq_feature_table)

            # ====================== 213 ======================
            #             create ST.26 feature source
            currentRow_INSDFeature = [
                0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature'],
                cu.TAG_LENGTH_ST26['INSDFeature'], 'INSDFeature',
                'ST.26 mandatory feature source'
            ]
            res.append(currentRow_INSDFeature)

            currentRow_INSDFeature_key = [
                0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_key'],
                len('source') + cu.TAG_LENGTH_ST26['INSDFeature_key'],
                'INSDFeature_key', 'ST.26 mandatory feature source'
            ]

            res.append(currentRow_INSDFeature_key)

            sourceLocation = '1..%s' % parsedSequence.length
            currentRow_INSDFeature_location = [
                0, currentSeqId, 0, 0,
                cu.TAG_LENGTH_ST26['INSDFeature_location'],
                len(sourceLocation) +
                cu.TAG_LENGTH_ST26['INSDFeature_location'],
                'INSDFeature_location', 'ST.26 mandatory feature source'
            ]

            res.append(currentRow_INSDFeature_location)

            def append_INSDFeature_quals(msg):
                res.append([
                    0, currentSeqId, 0, 0,
                    cu.TAG_LENGTH_ST26['INSDFeature_quals'],
                    cu.TAG_LENGTH_ST26['INSDFeature_quals'],
                    'INSDFeature_quals', msg
                ])

#             add first the parent element INSDFeature_quals

            append_INSDFeature_quals('ST.26 mandatory feature source')

            def createQualifier(name, msg):
                currentRow_INSDQualifier = [
                    0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier'],
                    cu.TAG_LENGTH_ST26['INSDQualifier'], 'INSDQualifier', msg
                ]

                res.append(currentRow_INSDQualifier)

                currentRow_INSDQualifier_name = [
                    0, currentSeqId, 0, 0,
                    cu.TAG_LENGTH_ST26['INSDQualifier_name'],
                    len(name) + cu.TAG_LENGTH_ST26['INSDQualifier_name'],
                    'INSDQualifier_name', msg
                ]

                res.append(currentRow_INSDQualifier_name)

            def createQualifierValue(tag_st25, element_st25, value_st25, msg):

                currentRow_INSDQualifier_value = [
                    tag_st25, currentSeqId,
                    cu.safeLength(element_st25),
                    cu.safeLength(value_st25),
                    cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                    cu.safeLength(value_st25) +
                    cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                    'INSDQualifier_value', msg
                ]

                res.append(currentRow_INSDQualifier_value)

#             qualifier organism

            createQualifier('organism', 'ST.26 mandatory qualifier organism')
            createQualifierValue(213, seq.organism, parsedSequence.organism,
                                 'ST.26 mandatory qualifier organism')

            #             qualifier mol_type
            mol_typeValue = 'protein' if parsedSequence.molType == 'PRT' else 'genomic DNA'
            createQualifier('mol_type', 'ST.26 mandatory qualifier mol_type')
            #             createQualifierValue(0, 0, mol_typeValue, 'ST.26 mandatory qualifier mol_type')
            res.append([
                0, currentSeqId, 0, 0,
                cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                cu.safeLength(mol_typeValue) +
                cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                'INSDQualifier_value', 'ST.26 mandatory qualifier mol_type'
            ])

            #             end create ST.26 feature source

            # ====================== other features ======================
            parsedFeatures = parsedSequence.features
            for feat in seq.features:
                currentFeatureIndex = seq.features.index(feat)
                parsedFeature = parsedFeatures[currentFeatureIndex]
                isSimpleFeature = False
                if parsedFeature.key == cu.BLANK_PLACEHOLDER and parsedFeature.location == cu.BLANK_PLACEHOLDER:
                    isSimpleFeature = True
                if not isSimpleFeature:
                    # ====================== 220 ======================
                    currentRow220 = self._getSt25St26Lengths(
                        220, currentSeqId, feat.featureHeader,
                        parsedFeature.featureHeader, 'INSDFeature',
                        cu.BLANK_PLACEHOLDER)
                    res.append(currentRow220)

                    # ====================== 221 ======================
                    currentRow221 = self._getSt25St26Lengths(
                        221, currentSeqId, feat.key, parsedFeature.key,
                        'INSDFeature_key', cu.BLANK_PLACEHOLDER)
                    res.append(currentRow221)

                    # ====================== add row for mixed mode translation qualifier ======================
                    if parsedFeature.key == 'CDS':
                        createQualifier('translation',
                                        'ST.26 specific element translation')
                        translationRow = [
                            400, currentSeqId, 0,
                            cu.safeLength(parsedFeature.translation),
                            cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                            (cu.TAG_LENGTH_ST26['INSDQualifier_value'] +
                             len(cu.oneLetterCode(parsedFeature.translation))),
                            'INSDQualifier_value', '3-to-1 letter code'
                        ]

                        res.append(translationRow)

                    # ====================== 222 ======================
                    currentRow222 = self._getSt25St26Lengths(
                        222, currentSeqId, feat.location,
                        parsedFeature.location, 'INSDFeature_location',
                        cu.BLANK_PLACEHOLDER)
                    res.append(currentRow222)

# ====================== 223 ======================
                if parsedFeature.description != cu.BLANK_PLACEHOLDER:  #do not add row if 223 missing!
                    append_INSDFeature_quals('ST.26 mandatory element')
                    createQualifier('note', cu.BLANK_PLACEHOLDER)
                    createQualifierValue(223, feat.description,
                                         parsedFeature.description,
                                         cu.BLANK_PLACEHOLDER)

# ====================== 400 ======================
            if parsedSequence.molType == 'PRT':
                parsedResidues = parsedSequence.residues_prt
                currentRow400 = [
                    400, currentSeqId,
                    cu.safeLength(seq.residues),
                    cu.safeLength(parsedResidues),
                    cu.TAG_LENGTH_ST26['INSDSeq_sequence'],
                    (cu.TAG_LENGTH_ST26['INSDSeq_sequence'] +
                     len(cu.oneLetterCode(parsedResidues))),
                    'INSDSeq_sequence', '3-to-1 letter code'
                ]

            else:
                parsedResidues = parsedSequence.residues_nuc
                currentRow400 = self._getSt25St26Lengths(
                    400, currentSeqId, seq.residues, parsedResidues,
                    'INSDSeq_sequence', cu.BLANK_PLACEHOLDER)
            res.append(currentRow400)

        return res