コード例 #1
0
ファイル: kegg.py プロジェクト: javfg/find_annotations
def search_kegg(accessions):
    start_time = datetime.datetime.now()

    with yaspin(text="Retrieving KEGG annotations...", color="cyan") as sp:
        raw_data = ""
        for accession in accessions.dropna():
            path = KEGG()
            res = accession.split(":")
            try:
                for k, val in path.get_pathway_by_gene(res[1], res[0]).items():
                    _id = re.search("\d+", k).group(0)
                    raw_data = f"{raw_data}map{_id}\t\"{val}\"\n"
            except AttributeError:
                pass
        try:
            kegg = pandas.read_csv(pandas.compat.StringIO(raw_data),
                                   sep="\t",
                                   header=None)
            kegg.columns = ["accession", "description"]

            # Add column of counts.
            kegg["count"] = kegg.groupby("accession")["accession"].transform(
                "count")
            kegg = (kegg.drop_duplicates(subset="accession").sort_values(
                by="count", ascending=False).reset_index(drop=True))
            mssg = f"* Found {sum(kegg['count'])} KEGG pathways from which {len(kegg)} were unique."
        except pandas.errors.EmptyDataError:
            kegg = pandas.DataFrame()
            mssg = f"* Found 0 KEGG Pathways."

        time_diff = (datetime.datetime.now() - start_time).total_seconds()

        sp.text = f"Retrieving KEGG annotations => Task done in {time_diff} seconds."
        sp.ok("✔")
        print(mssg)
        return kegg
コード例 #2
0
def search_pathway(gene, organism):
    k = KEGG()
    return k.get_pathway_by_gene(gene, organism)
コード例 #3
0
with open(sys.argv[1], 'r') as fh:
    data = fh.read()
data = data.replace('\t', '\n')
inputGenes = data.split('\n')
if '' in inputGenes:
    inputGenes.remove('')

#############################

############## Main part
result = {}

for inputGene in inputGenes:
    print('Processing gene ' + inputGene + ':')
    try:
        pathways = k.get_pathway_by_gene(inputGene, 'hsa')
    except (AttributeError):
        print('Invalid gene identifier')
        continue

    else:
        if pathways:
            for pathway in pathways:
                if pathway != 'hsa01100':
                    print('\tProcessing pathway ' + pathway)
                    rel = k.parse_kgml_pathway(pathway)
                    genes_result = []

                    ####### Part where the functions are called
                    for entry in rel['entries']:
                        if entry['type'] == 'gene':