def test_sequence_conversion(self): pseq = PX.Sequence( type="protein", # id_ref=None, # id_source=None, symbol="ADHX", accession=PX.Accession("P81431", source="UniProtKB"), name="Alcohol dehydrogenase class-3", # location=None, mol_seq=PX.MolSeq( "TDATGKPIKCMAAIAWEAKKPLSIEEVEVAPPKSGEVRIKILHSGVCHTD"), uri=None, annotations=[ PX.Annotation(ref="EC:1.1.1.1"), PX.Annotation(ref="GO:0004022") ], domain_architecture=PX.DomainArchitecture( length=50, domains=[ PX.ProteinDomain(*args) for args in ( # value, start, end, confidence ("FOO", 0, 5, 7.0e-26), ("BAR", 8, 13, 7.2e-117), ("A-OK", 21, 34, 2.4e-06), ("WD40", 40, 50, 0.3)) ], )) srec = pseq.to_seqrecord() # TODO: check seqrec-specific traits (see args) # Seq(letters, alphabet), id, name, description, features pseq2 = PX.Sequence.from_seqrecord(srec)
def test_sequence_conversion(self): pseq = PX.Sequence( type='protein', # id_ref=None, # id_source=None, symbol='ADHX', accession=PX.Accession('P81431', source='UniProtKB'), name='Alcohol dehydrogenase class-3', # location=None, mol_seq=PX.MolSeq( 'TDATGKPIKCMAAIAWEAKKPLSIEEVEVAPPKSGEVRIKILHSGVCHTD'), uri=None, annotations=[ PX.Annotation(ref='EC:1.1.1.1'), PX.Annotation(ref='GO:0004022') ], domain_architecture=PX.DomainArchitecture( length=50, domains=[ PX.ProteinDomain(*args) for args in ( # value, start, end, confidence ('FOO', 0, 5, 7.0e-26), ('BAR', 8, 13, 7.2e-117), ('A-OK', 21, 34, 2.4e-06), ('WD40', 40, 50, 0.3)) ], )) srec = pseq.to_seqrecord() # TODO: check seqrec-specific traits (see args) # Seq(letters, alphabet), id, name, description, features pseq2 = PX.Sequence.from_seqrecord(srec)
def _parse_sequence(self, parent): """Parse a molecular sequence (PRIVATE).""" sequence = PX.Sequence(**parent.attrib) for event, elem in self.context: namespace, tag = _split_namespace(elem.tag) if event == "end": if tag == "sequence": parent.clear() break if tag in ("accession", "mol_seq", "uri", "domain_architecture"): setattr(sequence, tag, getattr(self, tag)(elem)) elif tag == "annotation": sequence.annotations.append(self.annotation(elem)) elif tag == "name": sequence.name = _collapse_wspace(elem.text) elif tag in ("symbol", "location"): setattr(sequence, tag, elem.text) elif namespace != NAMESPACES["phy"]: sequence.other.append(self.other(elem, namespace, tag)) parent.clear() return sequence
def _parse_sequence(self, parent): sequence = PX.Sequence(**parent.attrib) for event, elem in self.context: namespace, tag = _split_namespace(elem.tag) if event == 'end': if tag == 'sequence': parent.clear() break if tag in ('accession', 'mol_seq', 'uri', 'domain_architecture'): setattr(sequence, tag, getattr(self, tag)(elem)) elif tag == 'annotation': sequence.annotations.append(self.annotation(elem)) elif tag == 'name': sequence.name = _collapse_wspace(elem.text) elif tag in ('symbol', 'location'): setattr(sequence, tag, elem.text) elif namespace != NAMESPACES['phy']: sequence.other.append(self.other(elem, namespace, tag)) parent.clear() return sequence
calculator = DistanceCalculator('identity') dm = calculator.get_distance(aln) print(dm) constructor = DistanceTreeConstructor(calculator, 'nj') tree = constructor.build_tree(aln) print(tree) scorer = ParsimonyScorer() searcher = NNITreeSearcher(scorer) constructor = ParsimonyTreeConstructor(searcher, tree) pars_tree = constructor.build_tree(aln) egfr_phy = pars_tree.as_phyloxml() print(pars_tree) print(egfr_phy) print(list(pars_tree.find_elements('Inner3'))) for clade in egfr_phy.get_terminals(): key = clade.name accession = PhyloXML.Accession(key, 'NCBI') mol_seq = PhyloXML.MolSeq(lookup[key], is_aligned=True) sequence = PhyloXML.Sequence(type='aa', accession=accession, mol_seq=mol_seq) clade.sequences.append(sequence) Phylo.write(egfr_phy, 'egfr-family-annotated.xml', 'phyloxml')