예제 #1
0
    def setUp(self):
        self.go = OBO('files/test_data/go.obo')
        self.go.populate_annotations(GO_URL, remote_location=True)

        self.dsrepair_term = self.go.get_term(DSREPAIR_ID)

        lines = open('files/go_neg_slim.txt').readlines()
        self.slim_terms = set([l.strip() for l in lines])
예제 #2
0
class TestLabels(unittest.TestCase):

    def setUp(self):
        self.go = OBO('files/test_data/go.obo')
        self.go.populate_annotations(GO_URL, remote_location=True)

        self.dsrepair_term = self.go.get_term(DSREPAIR_ID)

        lines = open('files/go_neg_slim.txt').readlines()
        self.slim_terms = set([l.strip() for l in lines])

    def testOntoLabelsNeg(self):
        ol = OntoLabels(obo=self.go, slim_terms=self.slim_terms)

        (pos, neg) = ol.get_labels(DSREPAIR_ID)
        self.assertTrue(len(pos) > 0)
        self.assertEqual(pos,set(self.dsrepair_term.get_annotated_genes()))

        similar_term = self.go.get_term(DNA_REP_ID).get_annotated_genes()

        slim_overlap = self.go.get_ancestors(DNA_REP_ID) & \
            self.go.get_ancestors(DSREPAIR_ID) & \
            self.slim_terms

        self.assertTrue(DNA_METAB in slim_overlap)
        self.assertTrue(len(neg & set(similar_term)) == 0)

        for dterm in self.go.get_descendents(DNA_METAB):
            dgenes = set(self.go.get_term(dterm).get_annotated_genes())
            self.assertTrue(len(neg & dgenes) == 0)
예제 #3
0
class TestOBO(unittest.TestCase):
    def setUp(self):
        self.go = OBO('files/test_data/go.obo')
        self.dsrepair_term = self.go.get_term(DSREPAIR_ID)

    def test_load_remote(self):
        """Test loading an obo file from a URL"""
        self.go = OBO()
        self.go.load_obo(GO_URL, remote_location=True)
        self.test_load()

    def test_load(self):
        """Test an obo is loaded"""
        # Check that root biologcal process term is loaded
        term = self.go.get_term('GO:0008150')
        self.assertTrue(term is not None and term.name == 'biological_process')
        self.assertTrue(len(term.parent_of) > 0)

    def test_add_annotation(self):
        """Test that adding a gene annotations is correct"""
        self.assertTrue(self.dsrepair_term is not None)

        self.dsrepair_term.add_annotation('672')
        genes = self.dsrepair_term.get_annotated_genes()
        self.assertEqual(genes[0], '672')

    def test_propagation(self):
        """Test that gene propagation adds annotations to the correct terms"""
        self.assertTrue(self.dsrepair_term is not None)

        self.dsrepair_term.add_annotation('672')
        self.go.propagate()
        term_count = 0
        for term in self.go.get_termobject_list():
            term_count += len(term.get_annotated_genes())

        self.assertEqual(term_count, 26)

    def test_direct_annotation(self):
        """Test that direct annotation status is preserved"""
        self.assertTrue(self.dsrepair_term is not None)

        self.go.add_annotation(go_id=DSREPAIR_ID,
                               gid='672',
                               ref=None,
                               direct=True)
        self.go.propagate()

        direct_count, total = 0, 0
        for term in self.go.get_termobject_list():
            for a in term.annotations:
                if a.direct:
                    direct_count += 1
                    self.assertEqual(term, self.dsrepair_term)
                total += 1
        self.assertEqual(direct_count, 1)
        self.assertEqual(total, 26)

    def test_ancestors(self):
        """Test that ancestor terms are correct"""
        self.assertTrue(self.dsrepair_term is not None)
        self.assertEqual(len(self.go.get_ancestors(self.dsrepair_term.go_id)),
                         25)

    def test_parents(self):
        """Test that parent terms are stored correctly"""
        parents = self.dsrepair_term.child_of
        self.assertEqual(len(parents), 2)
        self.assertTrue(GOTerm('GO:0006302') in parents)
        self.assertTrue(GOTerm('GO:0000725') in parents)

    def test_heads(self):
        """Test that there are only three head nodes, corresponding to:
        biological process, molecular function, and cellular component
        """
        heads = set(
            [term for term in self.go.get_termobject_list() if term.head])
        self.assertEqual(len(heads), 3)

        self.assertTrue(GOTerm('GO:0008150') in heads)
        self.assertTrue(GOTerm('GO:0003674') in heads)
        self.assertTrue(GOTerm('GO:0005575') in heads)

    def test_obsolete(self):
        """Test that none of the stored terms are obsolete"""
        for term in self.go.get_termobject_list():
            self.assertTrue(not term.obsolete)

        for term in self.go.get_obsolete_terms():
            self.assertTrue(term.obsolete)

    def test_term_equals(self):
        """Test GOTerm equals method"""
        self.assertEqual(self.dsrepair_term, GOTerm(DSREPAIR_ID))

    def tearDown(self):
        self.go = None
        self.dsrepair_term = None
예제 #4
0
 def test_load_remote(self):
     """Test loading an obo file from a URL"""
     self.go = OBO()
     self.go.load_obo(GO_URL, remote_location=True)
     self.test_load()
예제 #5
0
 def setUp(self):
     self.go = OBO('files/test_data/go.obo')
     self.dsrepair_term = self.go.get_term(DSREPAIR_ID)
예제 #6
0
args = parser.parse_args()

if args.obo is None:
    sys.stderr.write("--obo file is required.\n")
    sys.exit()
if args.pub_filter and args.nspace is None:
    sys.stderr.write(
        "--When filtering by publication, must provide GO namespace.\n")
    sys.exit()

id_name = None
if args.idfile is not None:
    id_name = IDMap(args.idfile)

gene_ontology = OBO(args.obo)

logger.info('Populating gene associations')
if args.ass:
    gene_ontology.populate_annotations(args.ass,
                                       gene_col=args.gcol,
                                       term_col=args.term_col)
elif args.gmt:
    gmt = GMT(args.gmt)
    gene_ontology.populate_annotations_from_gmt(gmt)
else:
    sys.stderr.write(
        "--Provide gene annotations from an association file or a GMT file")
    exit()

if args.pub_filter:
예제 #7
0
                    help='List of background genes')

args = parser.parse_args()

ubiq = None
if args.ubiq_genes:
    ubiq = set()
    with open(args.ubiq_genes) as f:
        for l in f.readlines():
            ubiq.add(l.strip())

    logger.info('Total ubiquitous genes: %i', len(ubiq))

onto = None
if args.tissue_onto:
    onto = OBO(args.tissue_onto)

tissue_genes = GMT(args.tissue_genes)

if onto:
    onto.populate_annotations_from_gmt(tissue_genes)
    onto.propagate()
    tissue_genes = onto.as_gmt()

with open(args.pos) as f:
    edge_lines = f.readlines()

tissue_std_edges = defaultdict(dict)

for line in edge_lines:
    g1, g2, std = line.strip().split()[:3]