Python PipelineGeneInfoの例、CGATPipelines.PipelineGeneInfo Pythonの例

コード例 #1

0

ファイルを表示

def AnnotateWithMGI(infile, outfile):
    '''
    Uses the list of mouse gene symbols generated using homologene above
    to annotate mouse phenotypes provided through MGI at mousemine.org
    Tables:
    ensemblg2mgi$annot - original host ensemblg to mouse phenotype ID
    mgi$details - mouse phenotype ID to mouse phenotype details
    '''
    genelist = PipelineGeneInfo.getSymbols(infile)
    MGI = PipelineGeneInfo.MGIAnnotation(PARAMS['homologues_mousemine'],
                                         PARAMS['db_name'])
    PipelineGeneInfo.runall(MGI, genelist, submit=True)

コード例 #2

0

ファイルを表示

def AnnotateWithMousePathway(infile, outfile):
    '''
    Uses the list of mouse gene symbols generated using homologene above
    to annotate mouse pathways provided at mousemine.org
    Tables:
    ensemblg2mousepathway$annot - original host ensemblg to
                                  mouse pathway ID
    mousepathway$details - mouse pathway ID to mouse pathway details
    '''
    genelist = PipelineGeneInfo.getSymbols(infile)
    MP = PipelineGeneInfo.MousePathwayAnnotation(
        PARAMS['homologues_mousemine'], PARAMS['db_name'])
    PipelineGeneInfo.runall(MP, genelist, submit=True)

コード例 #3

0

ファイルを表示

ファイル: pipeline_geneinfo.py プロジェクト: CGATOxford/CGATPipelines

def AnnotateWithMGI(infile, outfile):
    '''
    Uses the list of mouse gene symbols generated using homologene above
    to annotate mouse phenotypes provided through MGI at mousemine.org
    Tables:
    ensemblg2mgi$annot - original host ensemblg to mouse phenotype ID
    mgi$details - mouse phenotype ID to mouse phenotype details
    '''
    genelist = list(set(PipelineGeneInfo.getSymbols(infile)))
    MGI = PipelineGeneInfo.MGIAnnotation(
        PARAMS['homologues_mousemine'],
        PARAMS['db_name'], ohost=PARAMS['entrez_host'])
    PipelineGeneInfo.runall(MGI, genelist, submit=True)

コード例 #4

0

ファイルを表示

ファイル: pipeline_geneinfo.py プロジェクト: CGATOxford/CGATPipelines

def GetAndTranslateAllGenes(outfile):
    '''
    This step is required.
    1. All Entrez gene IDs are downloaded from entrez gene.
    2. Corresponding ensembl gene, ensembl transcript and ensembl protein
       IDs are downloaded from mygene.info
    3. Corresponding gene symbols are downloaded from mygene.info
    4. These are loaded into the database
    5. A list of all gene Entrez IDs is stored as 'allgenes.tsv

    Tables:
    ensemblg2entrez$geneid - ensemblg to entrez ID
    ensemblg2ensemblt$other - ensemblg to ensembl transcript
    ensemblg2ensemblp$other - ensemblg to ensembl protein
    ensemblg2symbol_xxx$geneid - ensemblg to symbol in species xxx
    '''
    GeneAnnot = PipelineGeneInfo.EntrezGeneAnnotation(
        PARAMS['db_name'], PARAMS['entrez_email'])
    if PARAMS['test'] == 1:
        entrezgenelist = GeneAnnot.download_all(PARAMS['entrez_host'],
                                                count=100)
    else:
        entrezgenelist = GeneAnnot.download_all(PARAMS['entrez_host'])

    # Generate a SymbolAnnotation object
    Sym = PipelineGeneInfo.SymbolAnnotation(PARAMS['my_gene_info_source'],
                                            PARAMS['db_name'],
                                            PARAMS['entrez_host'],
                                            PARAMS['entrez_sciname'])

    # Get Symbol Annotations
    PipelineGeneInfo.runall(Sym, entrezgenelist, ['symbol'],
                            scope='entrezgene', species=PARAMS['entrez_host'],
                            submit=True)

    genesymbols = list(pd.read_csv("entrez2symbol_%s.tsv" % PARAMS[
        'entrez_host'], sep="\t")['symbol_%s' % PARAMS['entrez_host']])

    # Generate an EnsemblAnnotation object
    Ens = PipelineGeneInfo.EnsemblAnnotation(PARAMS['my_gene_info_source'],
                                             PARAMS['db_name'],
                                             PARAMS['entrez_host'])
    # Get Ensembl annotations
    PipelineGeneInfo.runall(Ens, genesymbols, ['ensembl'], scope="symbol",
                            species=PARAMS['entrez_host'], submit=True)

    # Make output gene list
    outf = IOTools.openFile(outfile, "w")
    for gene in genesymbols:
        outf.write("%s\n" % gene)
    outf.close()

コード例 #5

0

ファイルを表示

def AnnotateWithHomologene(infile, outfile):
    '''
    Annotates all genes in allgenes.tsv with homologous gene symbols from
    either a list of species provided in the pipeline.ini or all species
    available in homologene via mygene.info
    Tables:
    ensemblg2symbol_xxx$geneid - ensemblg in original species to symbol in xxx
    '''
    genelist = PipelineGeneInfo.readGeneList(infile)
    HG = PipelineGeneInfo.HomologeneAnnotation(
        PARAMS['my_gene_info_source'], PARAMS['db_name'],
        PARAMS['my_gene_info_homologene'], PARAMS['entrez_host'],
        PARAMS['entrez_email'])
    PipelineGeneInfo.runall(HG, genelist, ['homologene'], submit=True)

コード例 #6

0

ファイルを表示

def AnnotateWithPathway(infile, outfile):
    '''
    Annotates all genes in allgenes.tsv with pathway details, either
    for all pathway databases available via mygene.info or those
    specified in the pipeline.ini
    Tables:
    ensemblg2xxx$annot - ensemblg to ID in pathway database
    xxx$details - pathway database ID to pathway details
    '''
    genelist = PipelineGeneInfo.readGeneList(infile)
    PW = PipelineGeneInfo.PathwayAnnotation(PARAMS['my_gene_info_source'],
                                            PARAMS['db_name'],
                                            PARAMS['my_gene_info_pathway'])
    PipelineGeneInfo.runall(PW, genelist, ['pathway'], submit=True)

コード例 #7

0

ファイルを表示

ファイル: pipeline_geneinfo.py プロジェクト: CGATOxford/CGATPipelines

def AnnotateWithMousePathway(infile, outfile):
    '''
    Uses the list of mouse gene symbols generated using homologene above
    to annotate mouse pathways provided at mousemine.org
    Tables:
    ensemblg2mousepathway$annot - original host ensemblg to
                                  mouse pathway ID
    mousepathway$details - mouse pathway ID to mouse pathway details
    '''
    genelist = list(set(PipelineGeneInfo.getSymbols(infile)))
    MP = PipelineGeneInfo.MousePathwayAnnotation(
        PARAMS['homologues_mousemine'],
        PARAMS['db_name'], ohost=PARAMS['entrez_host'])
    PipelineGeneInfo.runall(MP, genelist, submit=True)

コード例 #8

0

ファイルを表示

def AnnotateWithHPO(infile, outfile):
    '''
    Uses the list of human gene symbols generated using homologene above
    to annotate human phenotypes provided through HPO at humanmine.org
    Tables:
    ensemblg2hpo$annot - original host ensemblg to human phenotype ID
    hpo$details - human phenotype ID to human phenotype details
    '''
    genelist = PipelineGeneInfo.getSymbols(infile)
    HPO = PipelineGeneInfo.HPOAnnotation(PARAMS['homologues_humanmine'],
                                         PARAMS['db_name'])
    PipelineGeneInfo.runall(HPO, genelist, submit=True)
    ont = PipelineGeneInfo.OntologyAnnotation('hpo',
                                              PARAMS['homologues_hpoont'],
                                              PARAMS['db_name'])
    ont.runall(genelist)

コード例 #9

0

ファイルを表示

ファイル: pipeline_geneinfo.py プロジェクト: CGATOxford/CGATPipelines

def AnnotateWithHomologene(infile, outfile):
    '''
    Annotates all genes in allgenes.tsv with homologous gene symbols from
    either a list of species provided in the pipeline.ini or all species
    available in homologene via mygene.info
    Tables:
    ensemblg2symbol_xxx$geneid - ensemblg in original species to symbol in xxx
    '''
    genelist = PipelineGeneInfo.readGeneList(infile)
    HG = PipelineGeneInfo.HomologeneAnnotation(PARAMS['my_gene_info_source'],
                                               PARAMS['db_name'],
                                               PARAMS[
                                                   'my_gene_info_homologene'],
                                               PARAMS['entrez_host'],
                                               PARAMS['entrez_email'])
    PipelineGeneInfo.runall(HG, genelist, ['homologene'],
                            species=PARAMS['entrez_host'], submit=True)

コード例 #10

0

ファイルを表示

ファイル: pipeline_geneinfo.py プロジェクト: CGATOxford/CGATPipelines

def AnnotateWithPathway(infile, outfile):
    '''
    Annotates all genes in allgenes.tsv with pathway details, either
    for all pathway databases available via mygene.info or those
    specified in the pipeline.ini
    Tables:
    ensemblg2xxx$annot - ensemblg to ID in pathway database
    xxx$details - pathway database ID to pathway details
    '''
    genelist = PipelineGeneInfo.readGeneList(infile)
    PW = PipelineGeneInfo.PathwayAnnotation(PARAMS['my_gene_info_source'],
                                            PARAMS['db_name'],
                                            PARAMS['my_gene_info_pathway'],
                                            PARAMS['entrez_host'])
    PipelineGeneInfo.runall(PW, genelist,
                            ['pathway'], species=PARAMS['entrez_host'],
                            submit=True)

コード例 #11

0

ファイルを表示

ファイル: pipeline_geneinfo.py プロジェクト: logust79/cgat-flow

def AnnotateWithGO(infile, outfile):
    '''
    Annotates all genes in allgenes.tsv with GO ontology terms using
    information from mygene.info
    Tables:
    ensemblg2go$annot- ensemblg to go ID
    go$details - go ID to details of go term
    go$ont - go ID to parent go IDs
    '''
    genelist = PipelineGeneInfo.readGeneList(infile)
    # Generate a GoAnnotation object with details from mygene.info
    GO = PipelineGeneInfo.GoAnnotation(PARAMS['my_gene_info_source'],
                                       PARAMS['db_name'],
                                       PARAMS['my_gene_info_go'],
                                       PARAMS['entrez_host'])
    PipelineGeneInfo.runall(GO,
                            genelist, ['go'],
                            species=PARAMS['entrez_host'],
                            submit=True)

    # Get the GO hierarcical ontology from OBO foundry
    ont = PipelineGeneInfo.OntologyAnnotation('go',
                                              PARAMS['my_gene_info_goont'],
                                              PARAMS['db_name'])
    PipelineGeneInfo.runall(ont,
                            genelist,
                            species=PARAMS['entrez_host'],
                            submit=True)

コード例 #12

0

ファイルを表示

ファイル: pipeline_geneinfo.py プロジェクト: CGATOxford/CGATPipelines

def AnnotateWithHPO(infile, outfile):
    '''
    Uses the list of human gene symbols generated using homologene above
    to annotate human phenotypes provided through HPO at humanmine.org
    Tables:
    ensemblg2hpo$annot - original host ensemblg to human phenotype ID
    hpo$details - human phenotype ID to human phenotype details
    '''
    genelist = list(set(PipelineGeneInfo.getSymbols(infile)))
    HPO = PipelineGeneInfo.HPOAnnotation(
        PARAMS['homologues_humanmine'],
        PARAMS['db_name'], PARAMS['entrez_host'])
    PipelineGeneInfo.runall(HPO, genelist, submit=True)
    ont = PipelineGeneInfo.OntologyAnnotation('hpo',
                                              PARAMS['homologues_hpoont'],
                                              PARAMS['db_name'])
    PipelineGeneInfo.runall(ont, genelist, species=PARAMS['entrez_host'],
                            submit=True)

コード例 #13

0

ファイルを表示

ファイル: pipeline_geneinfo.py プロジェクト: logust79/cgat-flow

def GetAndTranslateAllGenes(outfile):
    '''
    This step is required.
    1. All Entrez gene IDs are downloaded from entrez gene.
    2. Corresponding ensembl gene, ensembl transcript and ensembl protein
       IDs are downloaded from mygene.info
    3. Corresponding gene symbols are downloaded from mygene.info
    4. These are loaded into the database
    5. A list of all gene Entrez IDs is stored as 'allgenes.tsv

    Tables:
    ensemblg2entrez$geneid - ensemblg to entrez ID
    ensemblg2ensemblt$other - ensemblg to ensembl transcript
    ensemblg2ensemblp$other - ensemblg to ensembl protein
    ensemblg2symbol_xxx$geneid - ensemblg to symbol in species xxx
    '''
    GeneAnnot = PipelineGeneInfo.EntrezGeneAnnotation(PARAMS['db_name'],
                                                      PARAMS['entrez_email'])
    if PARAMS['test'] == 1:
        entrezgenelist = GeneAnnot.download_all(PARAMS['entrez_host'],
                                                count=100)
    else:
        entrezgenelist = GeneAnnot.download_all(PARAMS['entrez_host'])

    # Generate a SymbolAnnotation object
    Sym = PipelineGeneInfo.SymbolAnnotation(PARAMS['my_gene_info_source'],
                                            PARAMS['db_name'],
                                            PARAMS['entrez_host'],
                                            PARAMS['entrez_sciname'])

    # Get Symbol Annotations
    PipelineGeneInfo.runall(Sym,
                            entrezgenelist, ['symbol'],
                            scope='entrezgene',
                            species=PARAMS['entrez_host'],
                            submit=True)

    genesymbols = list(
        pd.read_csv("entrez2symbol_%s.tsv" % PARAMS['entrez_host'],
                    sep="\t")['symbol_%s' % PARAMS['entrez_host']])

    # Generate an EnsemblAnnotation object
    Ens = PipelineGeneInfo.EnsemblAnnotation(PARAMS['my_gene_info_source'],
                                             PARAMS['db_name'],
                                             PARAMS['entrez_host'])
    # Get Ensembl annotations
    PipelineGeneInfo.runall(Ens,
                            genesymbols, ['ensembl'],
                            scope="symbol",
                            species=PARAMS['entrez_host'],
                            submit=True)

    # Make output gene list
    outf = IOTools.open_file(outfile, "w")
    for gene in genesymbols:
        outf.write("%s\n" % gene)
    outf.close()

コード例 #14

0

ファイルを表示

ファイル: pipeline_geneinfo.py プロジェクト: logust79/cgat-flow

def MakeSubDBs(infile, outfile):
    '''
    Takes any lists of genes provided in genesets.dir and makes a database
    in genesetdbs.dir containing only annotations for genes in the list.
    These will have the same gene ID type as the input lists
    and allow the user to quickly see the annotations for their genes
    of interest.
    '''
    PipelineGeneInfo.MakeSubDBs(infile,
                                outfile,
                                PARAMS['db_subsettype'],
                                PARAMS['db_name'],
                                submit=True)

コード例 #15

0

ファイルを表示

ファイル: pipeline_geneinfo.py プロジェクト: CGATOxford/CGATPipelines

def AnnotateWithGO(infile, outfile):
    '''
    Annotates all genes in allgenes.tsv with GO ontology terms using
    information from mygene.info
    Tables:
    ensemblg2go$annot- ensemblg to go ID
    go$details - go ID to details of go term
    go$ont - go ID to parent go IDs
    '''
    genelist = PipelineGeneInfo.readGeneList(infile)
    # Generate a GoAnnotation object with details from mygene.info
    GO = PipelineGeneInfo.GoAnnotation(PARAMS['my_gene_info_source'],
                                       PARAMS['db_name'],
                                       PARAMS['my_gene_info_go'],
                                       PARAMS['entrez_host'])
    PipelineGeneInfo.runall(GO, genelist, ['go'],
                            species=PARAMS['entrez_host'], submit=True)

    # Get the GO hierarcical ontology from OBO foundry
    ont = PipelineGeneInfo.OntologyAnnotation('go',
                                              PARAMS['my_gene_info_goont'],
                                              PARAMS['db_name'])
    PipelineGeneInfo.runall(ont, genelist, species=PARAMS['entrez_host'],
                            submit=True)