def get_fasta(self, id_): """Returns FASTA string given a valid identifier .. seealso:: :mod:`bioservices.apps.fasta` for dedicated tools to manipulate FASTA """ from bioservices.apps.fasta import FASTA f = FASTA() f.load_fasta(id_) return f.fasta
def get_fasta_sequence(self, id_): """Returns FASTA sequence (Not FASTA) :param str id_: Should be the entry name :return: returns fasta sequence (string) .. warning:: this is the sequence found in a fasta file, not the fasta content itself. The difference is that the header is removed and the formatting of end of lines every 60 characters is removed. """ from bioservices.apps.fasta import FASTA f = FASTA() f.load_fasta(id_) return f.sequence
def test_fasta(): fasta = FASTA() fasta.load_fasta(None) fasta.load_fasta("P43403") fasta.load_fasta("P43403") # already there fasta.header fasta.gene_name fasta.sequence fasta.fasta fasta.identifier fh = tempfile.NamedTemporaryFile(delete=False) fasta.save_fasta(fh.name) fasta.read_fasta(fh.name) fh.delete = True fh.close()
def setup_class(klass): klass.s = FASTA(verbose=False)
from bioservices import UniProt from bioservices import PDB from bioservices.apps.fasta import FASTA from bioservices import ChEMBL import os # Leos from uniprot id to the protein name f = FASTA() u = UniProt(cache=True, verbose=False) s = PDB() c = ChEMBL(verbose=False) """ pdb chain to uniprot id: ftp.ebi.ac.uk/pub/databases/msd/sifts/csv/pdb_chain_uniprot.csv import requests from xml.etree.ElementTree import fromstring pdb_id = '4hhb.A' pdb_mapping_url = 'http://www.rcsb.org/pdb/rest/das/pdb_uniprot_mapping/alignment' uniprot_url = 'http://www.uniprot.org/uniprot/{}.xml' def get_uniprot_accession_id(response_xml): root = fromstring(response_xml) return next( el for el in root.getchildren()[0].getchildren() if el.attrib['dbSource'] == 'UniProt' ).attrib['dbAccessionId'] def get_uniprot_protein_name(uniport_id): uinprot_response = requests.get( uniprot_url.format(uniport_id) ).text