Beispiel #1
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-

#####################################################
# get the binary matrix of GO and genes association #
# matrix looks like this
#   genes...(cols)
# G
# O
# .
# .
# (GO IDs in the rows)
#####################################################

import sys

# d_go dictionary from amigo
from d_go import d_go

# get all the unique genes
all_genes = set([gene for genes in d_go.values() for gene in genes])

# print the header
# Genes_ID ..... GO_ID
sys.stdout.write("\t".join(all_genes) + "\tGO_ID\n")
for go_id in d_go:
    for gene in all_genes:
        sys.stdout.write("%d\t" % (0 + (gene in d_go[go_id])))
    sys.stdout.write("%s\n" % (go_id))
#! /usr/bin/env python
# -*- coding: utf-8 -*-

import sys

from d_go import d_go
from d_uniqueColor50rule import d_uniqueColor50rule

# get the unique gene associated with at least one GO id
uniqueGene = set([gene for genelist in d_go.values() for gene in genelist])

# color list
color_list = [d_uniqueColor50rule[i] if i in d_uniqueColor50rule else "NA" for i in uniqueGene]
# count the coverage of each go_id in each color
color_count = dict(zip(color_list, map(color_list.count, color_list)))
sys.stdout.write("%d\t%d\t%d\t%d\t%d\n"%( color_count['Red'],
                                          color_count['Pink'],
                                          color_count['Yellow'],
                                          color_count['Black'],
                                          color_count['NA']))