def test_match_multiple(self): ms = MotifSet(preload_motifs="default") ms = ms.filter({'database': ["jaspar_vertebrates"], 'name': ["MA0139.1.CTCF"]}, search="inexact") self.assertEqual(len(ms), 1) motif = ms.get_motif_list(1, 0.0001)[0] scanner = scan.Scanner(7) pssm_list, thresholds = [], [] thresholds.append(motif.threshold) thresholds.append(motif.threshold) pssm_list.append(motif.pssm) pssm_list.append(motif.pssm_rc) bg = tools.flat_bg(4) scanner.set_motifs(pssm_list, bg, thresholds) genomic_region = GenomicRegion("chr1", 0, 5022) # Reading sequence associated to genomic_region sequence = str(self.genome_file.fetch(genomic_region.chrom, genomic_region.initial, genomic_region.final)) grs = match_multiple(scanner, [motif], sequence, genomic_region) self.assertSequenceEqual(grs.sequences, [GenomicRegion("chr1", 4270, 4289, name="MA0139.1.CTCF", orientation="+"), GenomicRegion("chr1", 4180, 4199, name="MA0139.1.CTCF", orientation="-")])
def setUp(self): dirname = os.path.dirname(__file__) mtf_file = os.path.join(dirname, "../data/motifs/hocomoco.mtf") # we must enforce the use hocomoco as database self.motif_set = MotifSet(preload_motifs=False) self.motif_set.read_mtf(mtf_file)
def test_built_in_functions(self): ms = MotifSet(preload_motifs="hocomoco") self.assertTrue(str(ms).startswith("MotifSet:{"), msg="str(ms): wrong format") self.assertTrue(repr(ms) == str(ms), msg="MotifSet: repr does not equal str") ms2 = ms.filter( { 'name': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"] }, search="exact") self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ms2), msg="str(ms2): wrong MotifMap") self.assertTrue(str(ms2).startswith("MotifSet:{"), msg="str(ms2): wrong format") ma = ms2.__getitem__("ALX1_HUMAN.H11MO.0.B") self.assertTrue( "'thresholds': {0.005: 3.1595, 0.001: 6.52, 0.0005: 7.778, 0.0001: 10.3565, " "5e-05: 11.318, 1e-05: 13.4015}" in str(ma), msg="str(ma): threshold missing") self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ma), msg="str(ma): wrong Motif") self.assertTrue(repr(ma) == str(ma), msg="MotifAnnotation: repr does not equal str")
def test_create_default(self): ms = MotifSet() self.assertEqual( len(ms), 0, msg="motif dictionary must be empty by default (no preload)") motif_list = ms.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 0)
def test_built_in_functions(self): ms = MotifSet(preload_motifs="hocomoco") self.assertTrue(str(ms).startswith("MotifSet:{"), msg="str(ms): wrong format") self.assertTrue(repr(ms) == str(ms), msg="MotifSet: repr does not equal str") ms2 = ms.filter({'name': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"]}, search="exact") self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ms2), msg="str(ms2): wrong MotifMap") self.assertTrue(str(ms2).startswith("MotifSet:{"), msg="str(ms2): wrong format") ma = ms2.__getitem__("ALX1_HUMAN.H11MO.0.B") self.assertTrue("'thresholds': {1e-05: 13.4015, 0.0001: 10.3565, 0.001: 6.52, 0.0005: 7.778, 5e-05: 11.318," " 0.005: 3.1595}" in str(ma), msg="str(ma): threshold missing") self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ma), msg="str(ma): wrong Motif") self.assertTrue(repr(ma) == str(ma), msg="MotifAnnotation: repr does not equal str")
def test_create_default(self): ms = MotifSet() self.assertEqual( len(ms.motifs_map), 0, msg="motif dictionary must be empty by default (no preload)")
def test_create_multiple(self): ms = MotifSet(preload_motifs=["hocomoco", "jaspar_vertebrates"]) self.assertEqual( len(ms), 2042, msg= "motif dictionary must contain sum of motifs in files from jaspar" "_vertebrates and hocomoco")
def test_create_default(self): ms = MotifSet() self.assertEqual(len(ms), 0, msg="motif dictionary must be empty by default (no preload)") motif_list = ms.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 0)
from rgt.MotifSet import MotifSet from rgt.GeneSet import GeneSet import sys jaspar = '/home/ivan/projects/reg-gen/data/motifs/jaspar_vertebrates.mtf' uniprobe = '/home/ivan/projects/reg-gen/data/motifs/uniprobe_primary.mtf' internal = '/home/ivan/projects/reg-gen/data/motifs/internal.mtf' motif_set = MotifSet() motif_set.read_file([jaspar, uniprobe, internal]) motifs = [(l.strip("\n")).split("\t")[0] for l in open(sys.argv[1])] geneset_file = sys.argv[2] search_mode = sys.argv[3] genes = GeneSet("DC Genes") genes.read_expression(geneset_file) filtered = motif_set.filter_by_motifs(motifs) [filtered_genes, g_m, m_g] = filtered.filter_by_genes(genes, search=search_mode) genes_found = [] not_found = [] print "\t\t" + ("\t".join(genes.cond)) for m in motifs: try: sel_genes = m_g[m]
class MotifSetTest(unittest.TestCase): def setUp(self): dirname = os.path.dirname(__file__) mtf_file = os.path.join(dirname, "../data/motifs/hocomoco.mtf") # we must enforce the use hocomoco as database self.motif_set = MotifSet(preload_motifs=False) self.motif_set.read_mtf(mtf_file) def test_create_default(self): ms = MotifSet() self.assertEqual( len(ms.motifs_map), 0, msg="motif dictionary must be empty by default (no preload)") def test_create_empty(self): ms = MotifSet(preload_motifs=False) self.assertEqual(len(ms.motifs_map), 0, msg="motif dictionary must be empty") def test_create_non_empty(self): ms = MotifSet(preload_motifs=True) self.assertGreater(len(ms.motifs_map), 0, msg="motif dictionary must be non empty") def test_filter_keys_not_list(self): with self.assertRaises(ValueError): self.motif_set.filter("test") def test_filter_wrong_key_type(self): with self.assertRaises(ValueError): self.motif_set.filter([], key_type="test") def test_filter_names(self): ms2 = self.motif_set.filter(["ALX1_HUMAN.H11MO.0.B"], key_type="name", search="exact") self.assertEqual(len(ms2.motifs_map), 1) ms2 = self.motif_set.filter(["ALX1"], key_type="name", search="exact") self.assertEqual(len(ms2.motifs_map), 0) ms2 = self.motif_set.filter(["ALX1_HUMAN.H11MO.0.B"], key_type="name", search="inexact") self.assertEqual(len(ms2.motifs_map), 1) ms2 = self.motif_set.filter(["ALX1"], key_type="name", search="inexact") self.assertEqual(len(ms2.motifs_map), 1) ms2 = self.motif_set.filter(["ALX"], key_type="name", search="inexact") self.assertEqual(len(ms2.motifs_map), 3) ms2 = self.motif_set.filter(["ALX1_HUMAN.H11MO.0.B"], key_type="name", search="regex") self.assertEqual(len(ms2.motifs_map), 1) ms2 = self.motif_set.filter(["ALX1.*"], key_type="name", search="regex") self.assertEqual(len(ms2.motifs_map), 1) ms2 = self.motif_set.filter(["ALX[134]_.*"], key_type="name", search="regex") self.assertEqual(len(ms2.motifs_map), 3) def test_filter_genes(self): ms2 = self.motif_set.filter(["ALX1_HUMAN.H11MO.0.B"], key_type="gene_names", search="exact") self.assertEqual(len(ms2.motifs_map), 0) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter(["ALX1"], key_type="gene_names", search="exact") self.assertEqual(len(ms2.motifs_map), 1) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 1) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["ALX"], key_type="gene_names", search="exact") self.assertEqual(len(ms2.motifs_map), 0) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter(["ALX1"], key_type="gene_names", search="inexact") self.assertEqual(len(ms2.motifs_map), 1) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 1) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["ALX"], key_type="gene_names", search="inexact") self.assertEqual(len(ms2.motifs_map), 3) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 3) self.assertEqual(len(k2m), 3) ms2 = self.motif_set.filter(["ALX1.*"], key_type="gene_names", search="regex") self.assertEqual(len(ms2.motifs_map), 1) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 1) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["ALX[134]"], key_type="gene_names", search="regex") self.assertEqual(len(ms2.motifs_map), 3) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 3) self.assertEqual(len(k2m), 3) def test_filter_family(self): ms2 = self.motif_set.filter(["Paired-related HD factors"], key_type="family", search="exact") self.assertEqual(len(ms2.motifs_map), 35) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 35) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["factors"], key_type="family", search="exact") self.assertEqual(len(ms2.motifs_map), 0) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter(["Paired-related HD factors"], key_type="family", search="inexact") self.assertEqual(len(ms2.motifs_map), 35) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 35) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["Paired-related HD"], key_type="family", search="inexact") self.assertEqual(len(ms2.motifs_map), 35) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 35) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["factors"], key_type="family", search="inexact") self.assertEqual(len(ms2.motifs_map), 676) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 676) self.assertEqual(len(k2m), 59) ms2 = self.motif_set.filter(["Paired.*factors"], key_type="family", search="regex") self.assertEqual(len(ms2.motifs_map), 35) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 35) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["Paired-related.*"], key_type="family", search="regex") self.assertEqual(len(ms2.motifs_map), 35) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 35) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter([".*factors"], key_type="family", search="regex") self.assertEqual(len(ms2.motifs_map), 676) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 676) self.assertEqual(len(k2m), 59) def test_filter_uniprot(self): ms2 = self.motif_set.filter(["Q9H3D4"], key_type="uniprot_ids", search="exact") self.assertEqual(len(ms2.motifs_map), 2) m2k, k2m = ms2.get_mappings(key_type="uniprot_ids") self.assertEqual(len(m2k), 2) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["Q9H"], key_type="uniprot_ids", search="exact") self.assertEqual(len(ms2.motifs_map), 0) m2k, k2m = ms2.get_mappings(key_type="uniprot_ids") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter(["Q9H3D4"], key_type="uniprot_ids", search="inexact") self.assertEqual(len(ms2.motifs_map), 2) m2k, k2m = ms2.get_mappings(key_type="uniprot_ids") self.assertEqual(len(m2k), 2) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["Q9H"], key_type="uniprot_ids", search="inexact") self.assertEqual(len(ms2.motifs_map), 20) m2k, k2m = ms2.get_mappings(key_type="uniprot_ids") self.assertEqual(len(m2k), 20) self.assertEqual(len(k2m), 16) ms2 = self.motif_set.filter(["Q9H3D4"], key_type="uniprot_ids", search="regex") self.assertEqual(len(ms2.motifs_map), 2) m2k, k2m = ms2.get_mappings(key_type="uniprot_ids") self.assertEqual(len(m2k), 2) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["Q9H.*"], key_type="uniprot_ids", search="regex") self.assertEqual(len(ms2.motifs_map), 20) m2k, k2m = ms2.get_mappings(key_type="uniprot_ids") self.assertEqual(len(m2k), 20) self.assertEqual(len(k2m), 16) def test_filter_data_source(self): # implicitly, we are also testing the case insensitiveness of the string matching of all three types ms2 = self.motif_set.filter(["chip-seq"], key_type="data_source", search="exact") self.assertEqual(len(ms2.motifs_map), 433) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 433) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["chip"], key_type="data_source", search="exact") self.assertEqual(len(ms2.motifs_map), 0) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter(["chip-seq"], key_type="data_source", search="inexact") self.assertEqual(len(ms2.motifs_map), 433) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 433) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["chip"], key_type="data_source", search="inexact") self.assertEqual(len(ms2.motifs_map), 433) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 433) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["chip-seq"], key_type="data_source", search="regex") self.assertEqual(len(ms2.motifs_map), 433) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 433) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter(["(chip|selex)"], key_type="data_source", search="regex") self.assertEqual(len(ms2.motifs_map), 591) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 591) self.assertEqual(len(k2m), 2)
def test_get_mappings_wrong_key_type(self): ms = MotifSet() with self.assertRaises(ValueError): ms.get_mappings("test")
# search mode to map factors to motifs (exact or inexact) search_mode = sys.argv[3] # pvalue cutoff for definition of active factors pvalue = float(sys.argv[4]) # output file out = sys.argv[5] # genes to be used as potential targets targets = None if len(sys.argv) > 6: targets_file = sys.argv[6] # reading targets targets = GeneSet("genes") targets.read(targets_file) # starting motif databases if len(sys.argv) > 7: motif_set = MotifSet(preload_motifs=False) motif_set.read_mtf([sys.argv[7]]) else: motif_set = MotifSet(preload_motifs=True) # reading genes factors = GeneSet("genes") factors.read(factor_file) # we only want a subset of the motif set motif_set = motif_set.filter(factors.genes, key_type="gene_names", search=search_mode) motif_set.read_enrichment(enrichment_files, pvalue) motif_set.write_network(targets, out, pvalue)
def test_create_empty(self): ms = MotifSet(preload_motifs=None) self.assertEqual(len(ms), 0, msg="motif dictionary must be empty") motif_list = ms.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 0)
def test_create_non_empty(self): ms = MotifSet(preload_motifs="hocomoco") self.assertGreater(len(ms), 0, msg="motif dictionary must be non empty")
from rgt.MotifSet import MotifSet from rgt.GeneSet import GeneSet import sys jaspar='/home/ivan/projects/reg-gen/data/motifs/jaspar_vertebrates.mtf' uniprobe='/home/ivan/projects/reg-gen/data/motifs/uniprobe_primary.mtf' internal='/home/ivan/projects/reg-gen/data/motifs/internal.mtf' motif_set = MotifSet() motif_set.read_file([jaspar,uniprobe,internal]) motifs=[(l.strip("\n")).split("\t")[0] for l in open(sys.argv[1])] geneset_file=sys.argv[2] search_mode=sys.argv[3] genes=GeneSet("DC Genes") genes.read_expression(geneset_file) filtered=motif_set.filter_by_motifs(motifs) [filtered_genes,g_m,m_g]=filtered.filter_by_genes(genes,search=search_mode) genes_found=[] not_found=[] print "\t\t"+("\t".join(genes.cond)) for m in motifs: try: sel_genes=m_g[m] for g in sel_genes:
# pvalue cuttoff for definition of active factors pvalue=float(sys.argv[4]) # output file out=sys.argv[5] # genes to be used as potential targets filter_targets=[] targets=None if len(sys.argv) > 6: targets_file=sys.argv[6] # reading targets targets=GeneSet("genes") targets.read(targets_file) # starting motif databases motif_set = MotifSet() if len(sys.argv) > 7: motif_set.read_file([sys.argv[7]]) else: motif_set.read_file([jaspar,uniprobe,internal]) # reading genes factors=GeneSet("genes") factors.read(factor_file) # reading networks #for f in glob.glob(enrichment_files): # # use last dir name as name for condition # condition=os.path.dirname(f) # condition=condition.split("/")[-1] motif_set.read_motif_targets_enrichment(enrichment_files,pvalue)
def test_create_empty(self): ms = MotifSet(preload_motifs=False) self.assertEqual(len(ms.motifs_map), 0, msg="motif dictionary must be empty")
search_mode = sys.argv[3] # pvalue cutoff for definition of active factors pvalue = float(sys.argv[4]) # output file out = sys.argv[5] # genes to be used as potential targets targets = None if len(sys.argv) > 6: targets_file = sys.argv[6] # reading targets targets = GeneSet("genes") targets.read(targets_file) # starting motif databases if len(sys.argv) > 7: motif_set = MotifSet(preload_motifs=False) motif_set.read_mtf([sys.argv[7]]) else: motif_set = MotifSet(preload_motifs=True) # reading genes factors = GeneSet("genes") factors.read(factor_file) # we only want a subset of the motif set motif_set = motif_set.filter(factors.genes, key_type="gene_names", search=search_mode) motif_set.read_enrichment(enrichment_files, pvalue) motif_set.write_network(targets, out, pvalue)
def setUp(self): # we must enforce the use hocomoco as database self.motif_set = MotifSet(preload_motifs="hocomoco")
class MotifSetTest(unittest.TestCase): def setUp(self): # we must enforce the use hocomoco as database self.motif_set = MotifSet(preload_motifs="hocomoco") def test_built_in_functions(self): ms = MotifSet(preload_motifs="hocomoco") self.assertTrue(str(ms).startswith("MotifSet:{"), msg="str(ms): wrong format") self.assertTrue(repr(ms) == str(ms), msg="MotifSet: repr does not equal str") ms2 = ms.filter({'name': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"]}, search="exact") self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ms2), msg="str(ms2): wrong MotifMap") self.assertTrue(str(ms2).startswith("MotifSet:{"), msg="str(ms2): wrong format") ma = ms2.__getitem__("ALX1_HUMAN.H11MO.0.B") self.assertTrue("'thresholds': {1e-05: 13.4015, 0.0001: 10.3565, 0.001: 6.52, 0.0005: 7.778, 5e-05: 11.318," " 0.005: 3.1595}" in str(ma), msg="str(ma): threshold missing") self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ma), msg="str(ma): wrong Motif") self.assertTrue(repr(ma) == str(ma), msg="MotifAnnotation: repr does not equal str") def test_create_default(self): ms = MotifSet() self.assertEqual(len(ms), 0, msg="motif dictionary must be empty by default (no preload)") motif_list = ms.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 0) def test_create_multiple(self): ms = MotifSet(preload_motifs=["hocomoco", "jaspar_vertebrates"]) self.assertEqual(len(ms), 1875, msg="motif dictionary must contain sum of motifs in files from jaspar" "_vertebrates and hocomoco") def test_create_empty(self): ms = MotifSet(preload_motifs=None) self.assertEqual(len(ms), 0, msg="motif dictionary must be empty") motif_list = ms.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 0) def test_create_non_empty(self): ms = MotifSet(preload_motifs="hocomoco") self.assertGreater(len(ms), 0, msg="motif dictionary must be non empty") def test_filter_values_not_dict(self): with self.assertRaises(ValueError): self.motif_set.filter("test") def test_filter_wrong_key_type(self): with self.assertRaises(ValueError): self.motif_set.filter({'test': []}) def test_get_mappings_wrong_key_type(self): ms = MotifSet() with self.assertRaises(ValueError): ms.get_mappings("test") def test_filter_names(self): ms2 = self.motif_set.filter({'name': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"]}, search="exact") self.assertEqual(len(ms2), 1) motif_list = ms2.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 1) ms2 = self.motif_set.filter({'name': ["ALX1"], 'species': ["H**o sapiens"]}, search="exact") self.assertEqual(len(ms2), 0) motif_list = ms2.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 0) ms2 = self.motif_set.filter({'name': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"]}, search="inexact") self.assertEqual(len(ms2), 1) ms2 = self.motif_set.filter({'name': ["ALX1"], 'species': ["H**o sapiens"]}, search="inexact") self.assertEqual(len(ms2), 1) ms2 = self.motif_set.filter({'name': ["ALX"], 'species': ["H**o sapiens"]}, search="inexact") self.assertEqual(len(ms2), 3) ms2 = self.motif_set.filter({'name': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"]}, search="regex") self.assertEqual(len(ms2), 1) ms2 = self.motif_set.filter({'name': ["ALX1.*"], 'species': ["H**o sapiens"]}, search="regex") self.assertEqual(len(ms2), 1) ms2 = self.motif_set.filter({'name': ["ALX[134]_.*"], 'species': ["H**o sapiens"]}, search="regex") self.assertEqual(len(ms2), 3) def test_filter_genes(self): ms2 = self.motif_set.filter({'gene_names': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"]}, search="exact") self.assertEqual(len(ms2), 0) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter({'gene_names': ["ALX1"], 'species': ["H**o sapiens"]}, search="exact") self.assertEqual(len(ms2), 1) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 1) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'gene_names': ["ALX"], 'species': ["H**o sapiens"]}, search="exact") self.assertEqual(len(ms2), 0) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter({'gene_names': ["ALX1"], 'species': ["H**o sapiens"]}, search="inexact") self.assertEqual(len(ms2), 1) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 1) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'gene_names': ["ALX"], 'species': ["H**o sapiens"]}, search="inexact") self.assertEqual(len(ms2), 3) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 3) self.assertEqual(len(k2m), 3) ms2 = self.motif_set.filter({'gene_names': ["ALX1.*"], 'species': ["H**o sapiens"]}, search="regex") self.assertEqual(len(ms2), 1) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 1) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'gene_names': ["ALX[134]"], 'species': ["H**o sapiens"]}, search="regex") self.assertEqual(len(ms2), 3) m2k, k2m = ms2.get_mappings(key_type="gene_names") self.assertEqual(len(m2k), 3) self.assertEqual(len(k2m), 3) def test_filter_family(self): ms2 = self.motif_set.filter({'family': ["Paired-related HD factors"], 'species': ["H**o sapiens"]}, search="exact") self.assertEqual(len(ms2), 35) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 35) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'family': ["factors"], 'species': ["H**o sapiens"]}, search="exact") self.assertEqual(len(ms2), 0) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter({'family': ["Paired-related HD factors"], 'species': ["H**o sapiens"]}, search="inexact") self.assertEqual(len(ms2), 35) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 35) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'family': ["Paired-related HD"], 'species': ["H**o sapiens"]}, search="inexact") self.assertEqual(len(ms2), 35) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 35) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'family': ["factors"], 'species': ["H**o sapiens"]}, search="inexact") self.assertEqual(len(ms2), 673) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 673) self.assertEqual(len(k2m), 59) ms2 = self.motif_set.filter({'family': ["Paired.*factors"], 'species': ["H**o sapiens"]}, search="regex") self.assertEqual(len(ms2), 35) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 35) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'family': ["Paired-related.*"], 'species': ["H**o sapiens"]}, search="regex") self.assertEqual(len(ms2), 35) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 35) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'family': [".*factors"], 'species': ["H**o sapiens"]}, search="regex") self.assertEqual(len(ms2), 673) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 673) self.assertEqual(len(k2m), 59) def test_filter_uniprot(self): ms2 = self.motif_set.filter({'uniprot_ids': ["Q9H3D4"], 'species': ["H**o sapiens"]}, search="exact") self.assertEqual(len(ms2), 2) m2k, k2m = ms2.get_mappings(key_type="uniprot_ids") self.assertEqual(len(m2k), 2) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'uniprot_ids': ["Q9H"], 'species': ["H**o sapiens"]}, search="exact") self.assertEqual(len(ms2), 0) m2k, k2m = ms2.get_mappings(key_type="uniprot_ids") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter({'uniprot_ids': ["Q9H3D4"], 'species': ["H**o sapiens"]}, search="inexact") self.assertEqual(len(ms2), 2) m2k, k2m = ms2.get_mappings(key_type="uniprot_ids") self.assertEqual(len(m2k), 2) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'uniprot_ids': ["Q9H"], 'species': ["H**o sapiens"]}, search="inexact") self.assertEqual(len(ms2), 20) m2k, k2m = ms2.get_mappings(key_type="uniprot_ids") self.assertEqual(len(m2k), 20) self.assertEqual(len(k2m), 16) ms2 = self.motif_set.filter({'uniprot_ids': ["Q9H3D4"], 'species': ["H**o sapiens"]}, search="regex") self.assertEqual(len(ms2), 2) m2k, k2m = ms2.get_mappings(key_type="uniprot_ids") self.assertEqual(len(m2k), 2) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'uniprot_ids': ["Q9H.*"], 'species': ["H**o sapiens"]}, search="regex") self.assertEqual(len(ms2), 20) m2k, k2m = ms2.get_mappings(key_type="uniprot_ids") self.assertEqual(len(m2k), 20) self.assertEqual(len(k2m), 16) def test_filter_data_source(self): # implicitly, we are also testing the case insensitiveness of the string matching of all three types ms2 = self.motif_set.filter({'data_source': ["chip-seq"], 'species': ["H**o sapiens"]}, search="exact") self.assertEqual(len(ms2), 431) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 431) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'data_source': ["chip"], 'species': ["H**o sapiens", "Mus musculus"]}, search="exact") self.assertEqual(len(ms2), 0) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter({'data_source': ["chip-seq"], 'species': ["H**o sapiens"]}, search="inexact") self.assertEqual(len(ms2), 431) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 431) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'data_source': ["chip"], 'species': ["H**o sapiens"]}, search="inexact") self.assertEqual(len(ms2), 431) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 431) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'data_source': ["chip-seq"], 'species': ["H**o sapiens"]}, search="regex") self.assertEqual(len(ms2), 431) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 431) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'data_source': ["(chip|selex)"], 'species': ["H**o sapiens"]}, search="regex") self.assertEqual(len(ms2), 588) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 588) self.assertEqual(len(k2m), 2) def test_filter_database(self): ms2 = self.motif_set.filter({'database': ["hocomoco"]}, search="exact") self.assertEqual(len(ms2), 1296) m2k, k2m = ms2.get_mappings(key_type="database") self.assertEqual(len(m2k), 1296) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'database': ["jaspar_vertebrates"]}, search="exact") self.assertEqual(len(ms2), 0) m2k, k2m = ms2.get_mappings(key_type="database") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter({'database': ["jaspar_vertebrates"]}, search="inexact") self.assertEqual(len(ms2), 0) m2k, k2m = ms2.get_mappings(key_type="database") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter({'database': ["uniprobe"]}, search="inexact") self.assertEqual(len(ms2), 0) m2k, k2m = ms2.get_mappings(key_type="database") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter({'database': ["jaspar"]}, search="regex") self.assertEqual(len(ms2), 0) m2k, k2m = ms2.get_mappings(key_type="database") self.assertEqual(len(m2k), 0) self.assertEqual(len(k2m), 0) ms2 = self.motif_set.filter({'database': ["(hocomoco|jaspar)"]}, search="regex") self.assertEqual(len(ms2), 1296) m2k, k2m = ms2.get_mappings(key_type="database") self.assertEqual(len(m2k), 1296) self.assertEqual(len(k2m), 1) def test_filter(self): #test different combinations of key_types and keys ms2 = self.motif_set.filter({'data_source': ["chip-seq", "integrative"]}, search="exact") self.assertEqual(len(ms2), 1138) m2k, k2m = ms2.get_mappings(key_type="data_source") self.assertEqual(len(m2k), 1138) self.assertEqual(len(k2m), 2) ms2 = self.motif_set.filter( {'data_source': ["chip-seq", "integrative"], 'family': ["Steroid hormone receptors (NR3)"], 'species': ["Mus musculus"]}, search="exact") self.assertEqual(len(ms2), 14) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 14) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'data_source': ["chip-seq"], 'family': ["Steroid hormone receptors (NR3)"], 'tax_group': ["vertebrates", "plants"]}, search="exact") self.assertEqual(len(ms2), 25) m2k, k2m = ms2.get_mappings(key_type="tax_group") self.assertEqual(len(m2k), 25) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'data_source': ["chip-seq"], 'family': ["Steroid hormone receptors (NR3)"], 'species': ["Mus musculus", "H**o sapiens"]}, search="exact") self.assertEqual(len(ms2), 25) m2k, k2m = ms2.get_mappings(key_type="species") self.assertEqual(len(m2k), 25) self.assertEqual(len(k2m), 2) ms2 = self.motif_set.filter({'data_source': ["chip"], 'family': ["NR3"], 'tax_group': ["brates"]}, search="inexact") self.assertEqual(len(ms2), 25) m2k, k2m = ms2.get_mappings(key_type="tax_group") self.assertEqual(len(m2k), 25) self.assertEqual(len(k2m), 1) ms2 = self.motif_set.filter({'family': [".*related factors"]}, search="regex") self.assertEqual(len(ms2), 587) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 587) self.assertEqual(len(k2m), 36) motif_list = ms2.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 587) ms2 = self.motif_set.filter({'data_source': ["(chip|integr)"], 'family': ["multiple"], 'species': ["(musculus|sapiens)"]}, search="regex") self.assertEqual(len(ms2), 57) m2k, k2m = ms2.get_mappings(key_type="family") self.assertEqual(len(m2k), 57) self.assertEqual(len(k2m), 1) def test_filter_with_empty_dict(self): ms2 = self.motif_set.filter({}, search="exact") self.assertEqual(len(ms2), 1296) def test_create_motif_list(self): ms2 = self.motif_set.filter({'name': ["PITX"]}, search="inexact") # 5 Motifs threshold = ms2["PITX2_HUMAN.H11MO.0.D"].thresholds[0.0001] # we remove the pre-calculated thresholds so we can test whether the calculation works for ma in iter(ms2): for fpr in [0.005, 0.001, 0.0005, 0.0001, 0.00005, 0.00001]: ma.thresholds[fpr] = [] # is the new threshold equal to the mtf one? ml = ms2.get_motif_list(1.0, 0.0001) self.assertEqual(len(ml), len(ms2)) self.assertEqual(ml[2].threshold, threshold, msg="create_motif_list calculates threshold incorrectly") # is the threshold calculated for non-standard fpr? for ma in iter(ms2): ma.thresholds = {} ml = ms2.get_motif_list(1.0, 0.0001) self.assertEqual(ml[2].threshold, threshold, msg="create_motif_list doesn't work for empty thresholds") self.assertEqual(len(ml), len(ms2))
import sys from rgt.GeneSet import GeneSet from rgt.MotifSet import MotifSet motifs = [(l.strip("\n")).split("\t")[0] for l in open(sys.argv[1])] geneset_file = sys.argv[2] search_mode = sys.argv[3] # preload all available motifs from the repositories motif_set = MotifSet(preload_motifs=True) genes = GeneSet("DC Genes") genes.read_expression(geneset_file) # take only a subset of the motifs (using their exact names) motif_set, _, _ = motif_set.filter(motifs, key_type="name") # of these new motif set, take the subset of those matching these gene names # (we only care about the motif2gene mapping) _, m_g, _ = motif_set.filter(genes.genes, key_type="gene_names", search=search_mode) genes_found = [] not_found = [] print("\t\t" + ("\t".join(genes.cond))) for m in motifs: try: sel_genes = m_g[m] for g in sel_genes:
def test_create_non_empty(self): ms = MotifSet(preload_motifs=True) self.assertGreater(len(ms.motifs_map), 0, msg="motif dictionary must be non empty")
class CustomDBTest(unittest.TestCase): def setUp(self): # use CustomDB self.motif_set = MotifSet(preload_motifs=[os.path.join(os.path.dirname(__file__), "TestCustomDB")], motif_dbs=True) def test_loading(self): self.assertEqual(len(self.motif_set.motifs_map), 3, msg="loaded wrong number of motifs") self.assertIsNone(self.motif_set.motifs_map["firstMotif_5.0.B"].gene_names, msg="gene_names not None") self.assertIsNone(self.motif_set.motifs_map["secondMotif_5.0.B"].data_source, msg="data_source not None") self.assertEqual(len(self.motif_set.motifs_map["thirdMotif_5.0.B"].thresholds), 0, msg="thresholds is not an empty dict") def test_built_in_functions(self): self.assertTrue(str(self.motif_set).startswith("MotifSet:{"), msg="str(ms): wrong format") self.assertTrue(repr(self.motif_set) == str(self.motif_set), msg="MotifSet: repr does not equal str") ms2 = self.motif_set.filter({'name': ['firstMotif_5.0.B']}, search="exact") self.assertTrue("'name': 'firstMotif_5.0.B'" in str(ms2), msg="str(ms2): wrong MotifMap") self.assertTrue(str(ms2).startswith("MotifSet:{"), msg="str(ms2): wrong format") ma = ms2.__getitem__("firstMotif_5.0.B") self.assertTrue("'name': 'firstMotif_5.0.B'" in str(ma), msg="str(ma): wrong Motif") self.assertTrue(repr(ma) == str(ma), msg="MotifAnnotation: repr does not equal str") def test_filter_values_not_dict(self): with self.assertRaises(ValueError): self.motif_set.filter("test") def test_filter_wrong_key_type(self): with self.assertRaises(ValueError): self.motif_set.filter({'test': []}) def test_get_mappings_wrong_key_type(self): ms = MotifSet() with self.assertRaises(ValueError): ms.get_mappings("test") def test_filter_names(self): ms2 = self.motif_set.filter({'name': ["firstMotif_5.0.B"]}, search="exact") self.assertEqual(len(ms2), 1) motif_list = ms2.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 1) ms2 = self.motif_set.filter({'name': ["secondMotif_5.0.B"]}, search="inexact") self.assertEqual(len(ms2), 1) motif_list = ms2.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 1) ms2 = self.motif_set.filter({'name': ["thirdMotif_5.0.B"]}, search="regex") self.assertEqual(len(ms2), 1) motif_list = ms2.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 1) def test_filter_database(self): ms2 = self.motif_set.filter({'database': ["hocomoco"]}, search="exact") self.assertEqual(len(ms2), 0) ms2 = self.motif_set.filter({'database': ["TestCustomDB"]}, search="exact") self.assertEqual(len(ms2), 3) ms2 = self.motif_set.filter({'database': ["TestCustomDB"]}, search="inexact") self.assertEqual(len(ms2), 3) ms2 = self.motif_set.filter({'database': ["TestCustomDB"]}, search="regex") self.assertEqual(len(ms2), 3) def test_filter_with_empty_dict(self): ms2 = self.motif_set.filter({}, search="exact") self.assertEqual(len(ms2), 3)
search_mode = sys.argv[3] # pvalue cuttoff for definition of active factors pvalue = float(sys.argv[4]) # output file out = sys.argv[5] # genes to be used as potential targets filter_targets = [] targets = None if len(sys.argv) > 6: targets_file = sys.argv[6] # reading targets targets = GeneSet("genes") targets.read(targets_file) # starting motif databases motif_set = MotifSet() if len(sys.argv) > 7: motif_set.read_file([sys.argv[7]]) else: motif_set.read_file([jaspar, uniprobe, internal]) # reading genes factors = GeneSet("genes") factors.read(factor_file) # reading networks #for f in glob.glob(enrichment_files): # # use last dir name as name for condition # condition=os.path.dirname(f) # condition=condition.split("/")[-1] motif_set.read_motif_targets_enrichment(enrichment_files, pvalue)
def setUp(self): # use CustomDB self.motif_set = MotifSet(preload_motifs=[os.path.join(os.path.dirname(__file__), "TestCustomDB")], motif_dbs=True)