Example #1
0
    sys.exit()

id_name = None
if args.idfile is not None:
    id_name = IDMap(args.idfile)

gene_ontology = OBO(args.obo)

logger.info('Populating gene associations')
if args.ass:
    gene_ontology.populate_annotations(args.ass,
                                       gene_col=args.gcol,
                                       term_col=args.term_col)
elif args.gmt:
    gmt = GMT(args.gmt)
    gene_ontology.populate_annotations_from_gmt(gmt)
else:
    sys.stderr.write(
        "--Provide gene annotations from an association file or a GMT file")
    exit()

if args.pub_filter:
    pub_counts = defaultdict(set)
    for (term_id, term) in gene_ontology.go_terms.iteritems():
        if term.namespace != args.nspace:
            continue
        for a in term.annotations:
            pub_counts[a.ref].add((term, a))
    for (ref, annots) in pub_counts.iteritems():
        if len(annots) > 50:
            logger.info('Removing %i annotations from: %s', ref, len(annots))
Example #2
0
if args.ubiq_genes:
    ubiq = set()
    with open(args.ubiq_genes) as f:
        for l in f.readlines():
            ubiq.add(l.strip())

    logger.info('Total ubiquitous genes: %i', len(ubiq))

onto = None
if args.tissue_onto:
    onto = OBO(args.tissue_onto)

tissue_genes = GMT(args.tissue_genes)

if onto:
    onto.populate_annotations_from_gmt(tissue_genes)
    onto.propagate()
    tissue_genes = onto.as_gmt()

with open(args.pos) as f:
    edge_lines = f.readlines()

tissue_std_edges = defaultdict(dict)

for line in edge_lines:
    g1, g2, std = line.strip().split()[:3]
    edge = frozenset([g1, g2])

    # Skip edges where both genes are ubiquitous
    if ubiq and len(edge & ubiq) == 2:
        continue