def __init__(self): self.test_name = args.test_name self.sample_sets = GMT(args.gene_sets) self.anno_list = GMT(args.annotation_list) self.background = BACKGROUND([], args.background_list) self.alpha = args.rate self.output = args.output self.cpu_count = args.cpu self.precision = args.precision self.console = str2bool(args.console) self.significant = str2bool(args.significant) self.table = str2bool(args.table)
def test_enrichment_score(self): anno = GMT(os.path.join("files", "unittest_files", "GO_shortened.gmt")).genesets['GO:0070507'] expr_list = MAT(os.path.join("files", "unittest_files", "CLUSTERS.mat")) cluster = 0 self.assertAlmostEqual(float( enrichment_score(anno, cluster, expr_list, 1)), 0.629079429538, delta=0.0001)
def test_page(self): anno = GMT(os.path.join("files", "unittest_files", "GO_shortened.gmt")) expr_list = MAT(os.path.join("files", "unittest_files", "CLUSTERS.mat")) cluster = 0 page_result = page(expr_list, cluster, anno, 1, cpu_count()) self.assertAlmostEqual(float(page_result[0][0].p_value), 6.35801240607e-17, delta=0.0001) self.assertAlmostEqual(float(page_result[0][1].p_value), 1.1116067475e-10, delta=0.0001)
def test_wilcoxon(self): anno = GMT(os.path.join("files", "unittest_files", "GO_shortened.gmt")) expr_list = MAT(os.path.join("files", "unittest_files", "CLUSTERS.mat")) cluster = 0 wilcoxon_result = wilcoxon(expr_list, cluster, anno, 1, cpu_count()) self.assertAlmostEqual(float(wilcoxon_result[0][0].p_value), 1.1206994619e-10, delta=0.0001) self.assertAlmostEqual(float(wilcoxon_result[0][1].p_value), 0.0026701906744509285, delta=0.0001)
def test_gsea(self): anno = GMT(os.path.join("files", "unittest_files", "GO_shortened.gmt")) expr_list = MAT(os.path.join("files", "unittest_files", "CLUSTERS.mat")) cluster = 0 permutations = 5 gsea_result = gsea(expr_list, cluster, anno, permutations, 1.0, 1, cpu_count()) self.assertAlmostEqual(float(gsea_result[0][0].es), 0.629079429538, delta=0.0001) self.assertAlmostEqual(float(gsea_result[0][1].es), 0.458915389493, delta=0.0001)
def test_generate_inputs(self): anno = GMT(os.path.join("files", "unittest_files", "test_go.gmt")) expr_list = MAT(os.path.join("files", "unittest_files", "CLUSTERS.mat")) cluster = 0 permutations = 5 test_set = list(range(0, 40)) test_set = set([str(i) for i in test_set]) inputs = generate_inputs(anno, cluster, expr_list, permutations) self.assertEqual(inputs[0].anno_id, '0') self.assertEqual(inputs[0].anno_list, test_set) self.assertEqual(inputs[0].expr_cluster, 0) self.assertEqual(inputs[0].expr_list, expr_list) self.assertEqual(inputs[0].permutations, 5)
def __init__(self): self.test_name = args.test_name self.anno_list = GMT(args.annotation_list) self.expr_list = MAT(args.expr_list) #self.expr_list.DAVID_to_gene_symbol("C:\Users\Jimmy\Documents\dev\projects\enrichments\\files\\test_files\GPL_converted_list") #self.expr_list.normalize(args.cluster_number) self.expr_clusters = args.cluster_array self.permutations = args.permutations self.alpha = args.rate self.output = args.output self.weight = args.weight self.cpu_count = args.cpu self.precision = args.precision self.console = str2bool(args.console) self.significant = str2bool(args.significant) self.table = str2bool(args.table)
nargs=1, help='Ontology to use for propagation') args = parser.parse_args() MIN_POS, MAX_POS = 5, 500 if args.ontology == 'DO': onto = DiseaseOntology.generate() elif args.ontology == 'GO': onto = GeneOntology.generate() else: onto = Ontology.generate() if args.gmt: # Load GMT genes onto Disease Ontology and propagate gmt = GMT(filename=args.gmt) onto.populate_annotations_from_gmt(gmt) onto.propagate() # Filter terms by number of gene annotations terms = [ term.go_id for term in onto.get_termobject_list() if len(term.annotations) >= MIN_POS and len(term.annotations) <= MAX_POS ] if args.slim: # Build ontology aware labels lines = open(args.slim).readlines() slim_terms = set([l.strip() for l in lines]) labels = OntoLabels(obo=onto, slim_terms=slim_terms) else:
"--When filtering by publication, must provide GO namespace.\n") sys.exit() id_name = None if args.idfile is not None: id_name = IDMap(args.idfile) gene_ontology = OBO(args.obo) logger.info('Populating gene associations') if args.ass: gene_ontology.populate_annotations(args.ass, gene_col=args.gcol, term_col=args.term_col) elif args.gmt: gmt = GMT(args.gmt) gene_ontology.populate_annotations_from_gmt(gmt) else: sys.stderr.write( "--Provide gene annotations from an association file or a GMT file") exit() if args.pub_filter: pub_counts = defaultdict(set) for (term_id, term) in gene_ontology.go_terms.iteritems(): if term.namespace != args.nspace: continue for a in term.annotations: pub_counts[a.ref].add((term, a)) for (ref, annots) in pub_counts.iteritems(): if len(annots) > 50:
args = parser.parse_args() ubiq = None if args.ubiq_genes: ubiq = set() with open(args.ubiq_genes) as f: for l in f.readlines(): ubiq.add(l.strip()) logger.info('Total ubiquitous genes: %i', len(ubiq)) onto = None if args.tissue_onto: onto = OBO(args.tissue_onto) tissue_genes = GMT(args.tissue_genes) if onto: onto.populate_annotations_from_gmt(tissue_genes) onto.propagate() tissue_genes = onto.as_gmt() with open(args.pos) as f: edge_lines = f.readlines() tissue_std_edges = defaultdict(dict) for line in edge_lines: g1, g2, std = line.strip().split()[:3] edge = frozenset([g1, g2])