Beispiel #1
0
def query_uniprot(gene):
    u = up.UniProt()
    res = u.search("gene_exact:" + gene +
                   "+AND+reviewed:yes+AND+organism:9606",
                   columns="id").split("\n")

    if len(res[1:-1]) == 1:
        return res[1]
    else:
        return res[1:-1]
Beispiel #2
0
def test_muscle():
    m = MUSCLE(verbose=False)
    m.parameters
    m.getParametersDetails("format")

    u = uniprot.UniProt(verbose=False)
    f1 = u.get_fasta("P18812")
    f2 = u.get_fasta("P18813")

    jobid = m.run(frmt="fasta", sequence=f1+f2, email="*****@*****.**")
    m.getStatus(jobid)
    m.wait(jobid)

    m.getResultTypes(jobid)
    m.getResult(jobid, 'phylotree')
Beispiel #3
0
def test_muscle():
    m = MUSCLE(verbose=False)
    m.parameters
    m.get_parameter_details("format")
    try:
        m.get_parameter_details("formattt")
        assert False
    except:
        assert True

    u = uniprot.UniProt(verbose=False)
    f1 = u.get_fasta("P18812")
    f2 = u.get_fasta("P18813")

    jobid = m.run(frmt="fasta", sequence=f1 + f2, email="*****@*****.**")
    m.get_status(jobid)
    m.wait(jobid)

    m.get_result_types(jobid)
    m.get_result(jobid, 'phylotree')
Beispiel #4
0
def query_uniprot(gene):
    u = up.UniProt()
    return u.search(
        "gene_exact:" + gene + "+AND+reviewed:yes+AND+organism:9606",
        columns="id, protein names, comment(FUNCTION)").split("\n")[1]
Beispiel #5
0
def query_uniprot(gene):
    u = up.UniProt()
    return u.search("gene_exact:"+gene+"+AND+reviewed:yes+AND+organism:9606",
                    columns = "id").split("\n")[1]
Beispiel #6
0
    def __init__(self,
                 folder,
                 requests_per_sec=10,
                 padj_threshold=0.05,
                 log2_fc_threshold=0,
                 fc_threshold=None,
                 pattern="*complete*.xls"):

        assert log2_fc_threshold >= 0, "log2 fc_threshold must be >=0"

        if fc_threshold is not None:
            log2_fc_threshold = pylab.log2(fc_threshold)

        from bioservices import panther, quickgo, uniprot
        self.panther = panther.Panther()
        self.valid_taxons = [
            x['taxon_id'] for x in self.panther.get_supported_genomes()
        ]

        self.quickgo = quickgo.QuickGO(cache=True)
        self.uniprot = uniprot.UniProt(cache=True)

        self.quickgo.requests_per_sec = requests_per_sec
        self.uniprot.requests_per_sec = requests_per_sec

        self.ancestors = {
            "MF": "GO:0003674",
            "CC": "GO:0005575",
            "BP": "GO:0008150"
        }
        self.aspects = {"MF": "molecular_function"}
        self.ontologies = [
            'GO:0003674', 'GO:0008150', 'GO:0005575',
            'ANNOT_TYPE_ID_PANTHER_GO_SLIM_MF',
            'ANNOT_TYPE_ID_PANTHER_GO_SLIM_BP',
            'ANNOT_TYPE_ID_PANTHER_GO_SLIM_CC', 'ANNOT_TYPE_ID_PANTHER_PC',
            'ANNOT_TYPE_ID_PANTHER_PATHWAY', 'ANNOT_TYPE_ID_REACTOME_PATHWAY'
        ]

        self.ontology_aliases = [
            "MF", "BP", "CC", 'SLIM_MF', 'SLIM_BP', 'SLIM_CC', 'PROTEIN',
            'PANTHER_PATHWAY', 'REACTOME_PATHWAY'
        ]

        from sequana.rnadiff import RNADiffResults
        self.rnadiff = RNADiffResults(folder, pattern=pattern)
        logger.info(
            "Ignoring pvalue adjusted > {} and fold change in [{}, {}]".format(
                padj_threshold, 1 / (2**log2_fc_threshold),
                2**log2_fc_threshold))

        fc_threshold = log2_fc_threshold

        self.mygenes = self.rnadiff.df.query(
            "padj<=@padj_threshold and (log2FoldChange<=-@fc_threshold or log2FoldChange>=@fc_threshold)"
        )
        self.mygenes_down = self.rnadiff.df.query(
            "padj<=@padj_threshold and log2FoldChange<=-@fc_threshold")
        self.mygenes_up = self.rnadiff.df.query(
            "padj<=@padj_threshold and log2FoldChange>=@fc_threshold")

        self.mygenes = list(self.mygenes.sort_values('padj').index)
        self.mygenes_down = list(self.mygenes_down.sort_values('padj').index)
        self.mygenes_up = list(self.mygenes_up.sort_values('padj').index)

        # When using ENSEMBL, prefix "gene:"  should be removed to be understood
        # by PantherDB
        self.mygenes = [x.replace("gene:", "") for x in self.mygenes]
        self.mygenes_down = [x.replace("gene:", "") for x in self.mygenes_down]
        self.mygenes_up = [x.replace("gene:", "") for x in self.mygenes_up]

        logger.info("Kept {} genes ({} up; {} down)".format(
            len(self.mygenes), len(self.mygenes_down), len(self.mygenes_up)))