def query_uniprot(gene): u = up.UniProt() res = u.search("gene_exact:" + gene + "+AND+reviewed:yes+AND+organism:9606", columns="id").split("\n") if len(res[1:-1]) == 1: return res[1] else: return res[1:-1]
def test_muscle(): m = MUSCLE(verbose=False) m.parameters m.getParametersDetails("format") u = uniprot.UniProt(verbose=False) f1 = u.get_fasta("P18812") f2 = u.get_fasta("P18813") jobid = m.run(frmt="fasta", sequence=f1+f2, email="*****@*****.**") m.getStatus(jobid) m.wait(jobid) m.getResultTypes(jobid) m.getResult(jobid, 'phylotree')
def test_muscle(): m = MUSCLE(verbose=False) m.parameters m.get_parameter_details("format") try: m.get_parameter_details("formattt") assert False except: assert True u = uniprot.UniProt(verbose=False) f1 = u.get_fasta("P18812") f2 = u.get_fasta("P18813") jobid = m.run(frmt="fasta", sequence=f1 + f2, email="*****@*****.**") m.get_status(jobid) m.wait(jobid) m.get_result_types(jobid) m.get_result(jobid, 'phylotree')
def query_uniprot(gene): u = up.UniProt() return u.search( "gene_exact:" + gene + "+AND+reviewed:yes+AND+organism:9606", columns="id, protein names, comment(FUNCTION)").split("\n")[1]
def query_uniprot(gene): u = up.UniProt() return u.search("gene_exact:"+gene+"+AND+reviewed:yes+AND+organism:9606", columns = "id").split("\n")[1]
def __init__(self, folder, requests_per_sec=10, padj_threshold=0.05, log2_fc_threshold=0, fc_threshold=None, pattern="*complete*.xls"): assert log2_fc_threshold >= 0, "log2 fc_threshold must be >=0" if fc_threshold is not None: log2_fc_threshold = pylab.log2(fc_threshold) from bioservices import panther, quickgo, uniprot self.panther = panther.Panther() self.valid_taxons = [ x['taxon_id'] for x in self.panther.get_supported_genomes() ] self.quickgo = quickgo.QuickGO(cache=True) self.uniprot = uniprot.UniProt(cache=True) self.quickgo.requests_per_sec = requests_per_sec self.uniprot.requests_per_sec = requests_per_sec self.ancestors = { "MF": "GO:0003674", "CC": "GO:0005575", "BP": "GO:0008150" } self.aspects = {"MF": "molecular_function"} self.ontologies = [ 'GO:0003674', 'GO:0008150', 'GO:0005575', 'ANNOT_TYPE_ID_PANTHER_GO_SLIM_MF', 'ANNOT_TYPE_ID_PANTHER_GO_SLIM_BP', 'ANNOT_TYPE_ID_PANTHER_GO_SLIM_CC', 'ANNOT_TYPE_ID_PANTHER_PC', 'ANNOT_TYPE_ID_PANTHER_PATHWAY', 'ANNOT_TYPE_ID_REACTOME_PATHWAY' ] self.ontology_aliases = [ "MF", "BP", "CC", 'SLIM_MF', 'SLIM_BP', 'SLIM_CC', 'PROTEIN', 'PANTHER_PATHWAY', 'REACTOME_PATHWAY' ] from sequana.rnadiff import RNADiffResults self.rnadiff = RNADiffResults(folder, pattern=pattern) logger.info( "Ignoring pvalue adjusted > {} and fold change in [{}, {}]".format( padj_threshold, 1 / (2**log2_fc_threshold), 2**log2_fc_threshold)) fc_threshold = log2_fc_threshold self.mygenes = self.rnadiff.df.query( "padj<=@padj_threshold and (log2FoldChange<=-@fc_threshold or log2FoldChange>=@fc_threshold)" ) self.mygenes_down = self.rnadiff.df.query( "padj<=@padj_threshold and log2FoldChange<=-@fc_threshold") self.mygenes_up = self.rnadiff.df.query( "padj<=@padj_threshold and log2FoldChange>=@fc_threshold") self.mygenes = list(self.mygenes.sort_values('padj').index) self.mygenes_down = list(self.mygenes_down.sort_values('padj').index) self.mygenes_up = list(self.mygenes_up.sort_values('padj').index) # When using ENSEMBL, prefix "gene:" should be removed to be understood # by PantherDB self.mygenes = [x.replace("gene:", "") for x in self.mygenes] self.mygenes_down = [x.replace("gene:", "") for x in self.mygenes_down] self.mygenes_up = [x.replace("gene:", "") for x in self.mygenes_up] logger.info("Kept {} genes ({} up; {} down)".format( len(self.mygenes), len(self.mygenes_down), len(self.mygenes_up)))