Example #1
0
    def test_match_multiple(self):

        ms = MotifSet(preload_motifs="default")
        ms = ms.filter({'database': ["jaspar_vertebrates"], 'name': ["MA0139.1.CTCF"]}, search="inexact")

        self.assertEqual(len(ms), 1)

        motif = ms.get_motif_list(1, 0.0001)[0]

        scanner = scan.Scanner(7)

        pssm_list, thresholds = [], []

        thresholds.append(motif.threshold)
        thresholds.append(motif.threshold)
        pssm_list.append(motif.pssm)
        pssm_list.append(motif.pssm_rc)

        bg = tools.flat_bg(4)
        scanner.set_motifs(pssm_list, bg, thresholds)

        genomic_region = GenomicRegion("chr1", 0, 5022)

        # Reading sequence associated to genomic_region
        sequence = str(self.genome_file.fetch(genomic_region.chrom, genomic_region.initial, genomic_region.final))

        grs = match_multiple(scanner, [motif], sequence, genomic_region)

        self.assertSequenceEqual(grs.sequences,
                                 [GenomicRegion("chr1", 4270, 4289, name="MA0139.1.CTCF", orientation="+"),
                                  GenomicRegion("chr1", 4180, 4199, name="MA0139.1.CTCF", orientation="-")])
Example #2
0
    def setUp(self):
        dirname = os.path.dirname(__file__)
        mtf_file = os.path.join(dirname, "../data/motifs/hocomoco.mtf")

        # we must enforce the use hocomoco as database
        self.motif_set = MotifSet(preload_motifs=False)
        self.motif_set.read_mtf(mtf_file)
Example #3
0
 def test_built_in_functions(self):
     ms = MotifSet(preload_motifs="hocomoco")
     self.assertTrue(str(ms).startswith("MotifSet:{"),
                     msg="str(ms): wrong format")
     self.assertTrue(repr(ms) == str(ms),
                     msg="MotifSet: repr does not equal str")
     ms2 = ms.filter(
         {
             'name': ["ALX1_HUMAN.H11MO.0.B"],
             'species': ["H**o sapiens"]
         },
         search="exact")
     self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ms2),
                     msg="str(ms2): wrong MotifMap")
     self.assertTrue(str(ms2).startswith("MotifSet:{"),
                     msg="str(ms2): wrong format")
     ma = ms2.__getitem__("ALX1_HUMAN.H11MO.0.B")
     self.assertTrue(
         "'thresholds': {0.005: 3.1595, 0.001: 6.52, 0.0005: 7.778, 0.0001: 10.3565, "
         "5e-05: 11.318, 1e-05: 13.4015}" in str(ma),
         msg="str(ma): threshold missing")
     self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ma),
                     msg="str(ma): wrong Motif")
     self.assertTrue(repr(ma) == str(ma),
                     msg="MotifAnnotation: repr does not equal str")
Example #4
0
 def test_create_default(self):
     ms = MotifSet()
     self.assertEqual(
         len(ms),
         0,
         msg="motif dictionary must be empty by default (no preload)")
     motif_list = ms.get_motif_list(1.0, 0.0001)
     self.assertEqual(len(motif_list), 0)
Example #5
0
 def test_built_in_functions(self):
     ms = MotifSet(preload_motifs="hocomoco")
     self.assertTrue(str(ms).startswith("MotifSet:{"), msg="str(ms): wrong format")
     self.assertTrue(repr(ms) == str(ms), msg="MotifSet: repr does not equal str")
     ms2 = ms.filter({'name': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"]}, search="exact")
     self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ms2), msg="str(ms2): wrong MotifMap")
     self.assertTrue(str(ms2).startswith("MotifSet:{"), msg="str(ms2): wrong format")
     ma = ms2.__getitem__("ALX1_HUMAN.H11MO.0.B")
     self.assertTrue("'thresholds': {1e-05: 13.4015, 0.0001: 10.3565, 0.001: 6.52, 0.0005: 7.778, 5e-05: 11.318,"
                     " 0.005: 3.1595}" in str(ma), msg="str(ma): threshold missing")
     self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ma), msg="str(ma): wrong Motif")
     self.assertTrue(repr(ma) == str(ma), msg="MotifAnnotation: repr does not equal str")
Example #6
0
    def test_create_default(self):

        ms = MotifSet()
        self.assertEqual(
            len(ms.motifs_map),
            0,
            msg="motif dictionary must be empty by default (no preload)")
Example #7
0
 def test_create_multiple(self):
     ms = MotifSet(preload_motifs=["hocomoco", "jaspar_vertebrates"])
     self.assertEqual(
         len(ms),
         2042,
         msg=
         "motif dictionary must contain sum of motifs in files from jaspar"
         "_vertebrates and hocomoco")
Example #8
0
 def test_create_default(self):
     ms = MotifSet()
     self.assertEqual(len(ms), 0, msg="motif dictionary must be empty by default (no preload)")
     motif_list = ms.get_motif_list(1.0, 0.0001)
     self.assertEqual(len(motif_list), 0)
Example #9
0
from rgt.MotifSet import MotifSet
from rgt.GeneSet import GeneSet
import sys

jaspar = '/home/ivan/projects/reg-gen/data/motifs/jaspar_vertebrates.mtf'
uniprobe = '/home/ivan/projects/reg-gen/data/motifs/uniprobe_primary.mtf'
internal = '/home/ivan/projects/reg-gen/data/motifs/internal.mtf'

motif_set = MotifSet()
motif_set.read_file([jaspar, uniprobe, internal])

motifs = [(l.strip("\n")).split("\t")[0] for l in open(sys.argv[1])]

geneset_file = sys.argv[2]

search_mode = sys.argv[3]

genes = GeneSet("DC Genes")
genes.read_expression(geneset_file)

filtered = motif_set.filter_by_motifs(motifs)

[filtered_genes, g_m, m_g] = filtered.filter_by_genes(genes,
                                                      search=search_mode)

genes_found = []
not_found = []
print "\t\t" + ("\t".join(genes.cond))
for m in motifs:
    try:
        sel_genes = m_g[m]
Example #10
0
class MotifSetTest(unittest.TestCase):
    def setUp(self):
        dirname = os.path.dirname(__file__)
        mtf_file = os.path.join(dirname, "../data/motifs/hocomoco.mtf")

        # we must enforce the use hocomoco as database
        self.motif_set = MotifSet(preload_motifs=False)
        self.motif_set.read_mtf(mtf_file)

    def test_create_default(self):

        ms = MotifSet()
        self.assertEqual(
            len(ms.motifs_map),
            0,
            msg="motif dictionary must be empty by default (no preload)")

    def test_create_empty(self):
        ms = MotifSet(preload_motifs=False)
        self.assertEqual(len(ms.motifs_map),
                         0,
                         msg="motif dictionary must be empty")

    def test_create_non_empty(self):
        ms = MotifSet(preload_motifs=True)
        self.assertGreater(len(ms.motifs_map),
                           0,
                           msg="motif dictionary must be non empty")

    def test_filter_keys_not_list(self):
        with self.assertRaises(ValueError):
            self.motif_set.filter("test")

    def test_filter_wrong_key_type(self):
        with self.assertRaises(ValueError):
            self.motif_set.filter([], key_type="test")

    def test_filter_names(self):
        ms2 = self.motif_set.filter(["ALX1_HUMAN.H11MO.0.B"],
                                    key_type="name",
                                    search="exact")
        self.assertEqual(len(ms2.motifs_map), 1)

        ms2 = self.motif_set.filter(["ALX1"], key_type="name", search="exact")
        self.assertEqual(len(ms2.motifs_map), 0)

        ms2 = self.motif_set.filter(["ALX1_HUMAN.H11MO.0.B"],
                                    key_type="name",
                                    search="inexact")
        self.assertEqual(len(ms2.motifs_map), 1)

        ms2 = self.motif_set.filter(["ALX1"],
                                    key_type="name",
                                    search="inexact")
        self.assertEqual(len(ms2.motifs_map), 1)

        ms2 = self.motif_set.filter(["ALX"], key_type="name", search="inexact")
        self.assertEqual(len(ms2.motifs_map), 3)

        ms2 = self.motif_set.filter(["ALX1_HUMAN.H11MO.0.B"],
                                    key_type="name",
                                    search="regex")
        self.assertEqual(len(ms2.motifs_map), 1)

        ms2 = self.motif_set.filter(["ALX1.*"],
                                    key_type="name",
                                    search="regex")
        self.assertEqual(len(ms2.motifs_map), 1)

        ms2 = self.motif_set.filter(["ALX[134]_.*"],
                                    key_type="name",
                                    search="regex")
        self.assertEqual(len(ms2.motifs_map), 3)

    def test_filter_genes(self):
        ms2 = self.motif_set.filter(["ALX1_HUMAN.H11MO.0.B"],
                                    key_type="gene_names",
                                    search="exact")
        self.assertEqual(len(ms2.motifs_map), 0)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter(["ALX1"],
                                    key_type="gene_names",
                                    search="exact")
        self.assertEqual(len(ms2.motifs_map), 1)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 1)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["ALX"],
                                    key_type="gene_names",
                                    search="exact")
        self.assertEqual(len(ms2.motifs_map), 0)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter(["ALX1"],
                                    key_type="gene_names",
                                    search="inexact")
        self.assertEqual(len(ms2.motifs_map), 1)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 1)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["ALX"],
                                    key_type="gene_names",
                                    search="inexact")
        self.assertEqual(len(ms2.motifs_map), 3)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 3)
        self.assertEqual(len(k2m), 3)

        ms2 = self.motif_set.filter(["ALX1.*"],
                                    key_type="gene_names",
                                    search="regex")
        self.assertEqual(len(ms2.motifs_map), 1)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 1)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["ALX[134]"],
                                    key_type="gene_names",
                                    search="regex")
        self.assertEqual(len(ms2.motifs_map), 3)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 3)
        self.assertEqual(len(k2m), 3)

    def test_filter_family(self):
        ms2 = self.motif_set.filter(["Paired-related HD factors"],
                                    key_type="family",
                                    search="exact")
        self.assertEqual(len(ms2.motifs_map), 35)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 35)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["factors"],
                                    key_type="family",
                                    search="exact")
        self.assertEqual(len(ms2.motifs_map), 0)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter(["Paired-related HD factors"],
                                    key_type="family",
                                    search="inexact")
        self.assertEqual(len(ms2.motifs_map), 35)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 35)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["Paired-related HD"],
                                    key_type="family",
                                    search="inexact")
        self.assertEqual(len(ms2.motifs_map), 35)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 35)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["factors"],
                                    key_type="family",
                                    search="inexact")
        self.assertEqual(len(ms2.motifs_map), 676)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 676)
        self.assertEqual(len(k2m), 59)

        ms2 = self.motif_set.filter(["Paired.*factors"],
                                    key_type="family",
                                    search="regex")
        self.assertEqual(len(ms2.motifs_map), 35)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 35)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["Paired-related.*"],
                                    key_type="family",
                                    search="regex")
        self.assertEqual(len(ms2.motifs_map), 35)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 35)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter([".*factors"],
                                    key_type="family",
                                    search="regex")
        self.assertEqual(len(ms2.motifs_map), 676)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 676)
        self.assertEqual(len(k2m), 59)

    def test_filter_uniprot(self):
        ms2 = self.motif_set.filter(["Q9H3D4"],
                                    key_type="uniprot_ids",
                                    search="exact")
        self.assertEqual(len(ms2.motifs_map), 2)
        m2k, k2m = ms2.get_mappings(key_type="uniprot_ids")
        self.assertEqual(len(m2k), 2)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["Q9H"],
                                    key_type="uniprot_ids",
                                    search="exact")
        self.assertEqual(len(ms2.motifs_map), 0)
        m2k, k2m = ms2.get_mappings(key_type="uniprot_ids")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter(["Q9H3D4"],
                                    key_type="uniprot_ids",
                                    search="inexact")
        self.assertEqual(len(ms2.motifs_map), 2)
        m2k, k2m = ms2.get_mappings(key_type="uniprot_ids")
        self.assertEqual(len(m2k), 2)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["Q9H"],
                                    key_type="uniprot_ids",
                                    search="inexact")
        self.assertEqual(len(ms2.motifs_map), 20)
        m2k, k2m = ms2.get_mappings(key_type="uniprot_ids")
        self.assertEqual(len(m2k), 20)
        self.assertEqual(len(k2m), 16)

        ms2 = self.motif_set.filter(["Q9H3D4"],
                                    key_type="uniprot_ids",
                                    search="regex")
        self.assertEqual(len(ms2.motifs_map), 2)
        m2k, k2m = ms2.get_mappings(key_type="uniprot_ids")
        self.assertEqual(len(m2k), 2)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["Q9H.*"],
                                    key_type="uniprot_ids",
                                    search="regex")
        self.assertEqual(len(ms2.motifs_map), 20)
        m2k, k2m = ms2.get_mappings(key_type="uniprot_ids")
        self.assertEqual(len(m2k), 20)
        self.assertEqual(len(k2m), 16)

    def test_filter_data_source(self):
        # implicitly, we are also testing the case insensitiveness of the string matching of all three types

        ms2 = self.motif_set.filter(["chip-seq"],
                                    key_type="data_source",
                                    search="exact")
        self.assertEqual(len(ms2.motifs_map), 433)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 433)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["chip"],
                                    key_type="data_source",
                                    search="exact")
        self.assertEqual(len(ms2.motifs_map), 0)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter(["chip-seq"],
                                    key_type="data_source",
                                    search="inexact")
        self.assertEqual(len(ms2.motifs_map), 433)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 433)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["chip"],
                                    key_type="data_source",
                                    search="inexact")
        self.assertEqual(len(ms2.motifs_map), 433)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 433)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["chip-seq"],
                                    key_type="data_source",
                                    search="regex")
        self.assertEqual(len(ms2.motifs_map), 433)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 433)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter(["(chip|selex)"],
                                    key_type="data_source",
                                    search="regex")
        self.assertEqual(len(ms2.motifs_map), 591)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 591)
        self.assertEqual(len(k2m), 2)
Example #11
0
 def test_get_mappings_wrong_key_type(self):
     ms = MotifSet()
     with self.assertRaises(ValueError):
         ms.get_mappings("test")
Example #12
0
# search mode to map factors to motifs (exact or inexact)
search_mode = sys.argv[3]
# pvalue cutoff for definition of active factors
pvalue = float(sys.argv[4])
# output file
out = sys.argv[5]
# genes to be used as potential targets 
targets = None
if len(sys.argv) > 6:
    targets_file = sys.argv[6]
    # reading targets
    targets = GeneSet("genes")
    targets.read(targets_file)

# starting motif databases
if len(sys.argv) > 7:
    motif_set = MotifSet(preload_motifs=False)
    motif_set.read_mtf([sys.argv[7]])
else:
    motif_set = MotifSet(preload_motifs=True)

# reading genes 
factors = GeneSet("genes")
factors.read(factor_file)

# we only want a subset of the motif set
motif_set = motif_set.filter(factors.genes, key_type="gene_names", search=search_mode)

motif_set.read_enrichment(enrichment_files, pvalue)
motif_set.write_network(targets, out, pvalue)
Example #13
0
 def test_create_empty(self):
     ms = MotifSet(preload_motifs=None)
     self.assertEqual(len(ms), 0, msg="motif dictionary must be empty")
     motif_list = ms.get_motif_list(1.0, 0.0001)
     self.assertEqual(len(motif_list), 0)
Example #14
0
 def test_create_non_empty(self):
     ms = MotifSet(preload_motifs="hocomoco")
     self.assertGreater(len(ms), 0, msg="motif dictionary must be non empty")
Example #15
0
from rgt.MotifSet import MotifSet
from rgt.GeneSet import GeneSet
import sys

jaspar='/home/ivan/projects/reg-gen/data/motifs/jaspar_vertebrates.mtf'
uniprobe='/home/ivan/projects/reg-gen/data/motifs/uniprobe_primary.mtf'
internal='/home/ivan/projects/reg-gen/data/motifs/internal.mtf'

motif_set = MotifSet()
motif_set.read_file([jaspar,uniprobe,internal])

motifs=[(l.strip("\n")).split("\t")[0] for l in open(sys.argv[1])]

geneset_file=sys.argv[2]

search_mode=sys.argv[3]

genes=GeneSet("DC Genes")
genes.read_expression(geneset_file)
  
filtered=motif_set.filter_by_motifs(motifs)

[filtered_genes,g_m,m_g]=filtered.filter_by_genes(genes,search=search_mode)

genes_found=[]
not_found=[]
print "\t\t"+("\t".join(genes.cond))
for m in motifs:
  try:
    sel_genes=m_g[m]
    for g in sel_genes:
Example #16
0
# pvalue cuttoff for definition of active factors
pvalue=float(sys.argv[4])
# output file
out=sys.argv[5]
# genes to be used as potential targets 
filter_targets=[]
targets=None
if len(sys.argv) > 6:
  targets_file=sys.argv[6]
  # reading targets 
  targets=GeneSet("genes")
  targets.read(targets_file)


# starting motif databases
motif_set = MotifSet()
if len(sys.argv) > 7:
  motif_set.read_file([sys.argv[7]])
else:
  motif_set.read_file([jaspar,uniprobe,internal])

# reading genes 
factors=GeneSet("genes")
factors.read(factor_file)

# reading networks
#for f in glob.glob(enrichment_files): 
#  # use last dir name as name for condition
#  condition=os.path.dirname(f)
#  condition=condition.split("/")[-1]
motif_set.read_motif_targets_enrichment(enrichment_files,pvalue)
Example #17
0
 def test_create_empty(self):
     ms = MotifSet(preload_motifs=False)
     self.assertEqual(len(ms.motifs_map),
                      0,
                      msg="motif dictionary must be empty")
Example #18
0
search_mode = sys.argv[3]
# pvalue cutoff for definition of active factors
pvalue = float(sys.argv[4])
# output file
out = sys.argv[5]
# genes to be used as potential targets
targets = None
if len(sys.argv) > 6:
    targets_file = sys.argv[6]
    # reading targets
    targets = GeneSet("genes")
    targets.read(targets_file)

# starting motif databases
if len(sys.argv) > 7:
    motif_set = MotifSet(preload_motifs=False)
    motif_set.read_mtf([sys.argv[7]])
else:
    motif_set = MotifSet(preload_motifs=True)

# reading genes
factors = GeneSet("genes")
factors.read(factor_file)

# we only want a subset of the motif set
motif_set = motif_set.filter(factors.genes,
                             key_type="gene_names",
                             search=search_mode)

motif_set.read_enrichment(enrichment_files, pvalue)
motif_set.write_network(targets, out, pvalue)
Example #19
0
 def setUp(self):
     # we must enforce the use hocomoco as database
     self.motif_set = MotifSet(preload_motifs="hocomoco")
Example #20
0
class MotifSetTest(unittest.TestCase):
    def setUp(self):
        # we must enforce the use hocomoco as database
        self.motif_set = MotifSet(preload_motifs="hocomoco")

    def test_built_in_functions(self):
        ms = MotifSet(preload_motifs="hocomoco")
        self.assertTrue(str(ms).startswith("MotifSet:{"), msg="str(ms): wrong format")
        self.assertTrue(repr(ms) == str(ms), msg="MotifSet: repr does not equal str")
        ms2 = ms.filter({'name': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"]}, search="exact")
        self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ms2), msg="str(ms2): wrong MotifMap")
        self.assertTrue(str(ms2).startswith("MotifSet:{"), msg="str(ms2): wrong format")
        ma = ms2.__getitem__("ALX1_HUMAN.H11MO.0.B")
        self.assertTrue("'thresholds': {1e-05: 13.4015, 0.0001: 10.3565, 0.001: 6.52, 0.0005: 7.778, 5e-05: 11.318,"
                        " 0.005: 3.1595}" in str(ma), msg="str(ma): threshold missing")
        self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ma), msg="str(ma): wrong Motif")
        self.assertTrue(repr(ma) == str(ma), msg="MotifAnnotation: repr does not equal str")

    def test_create_default(self):
        ms = MotifSet()
        self.assertEqual(len(ms), 0, msg="motif dictionary must be empty by default (no preload)")
        motif_list = ms.get_motif_list(1.0, 0.0001)
        self.assertEqual(len(motif_list), 0)

    def test_create_multiple(self):
        ms = MotifSet(preload_motifs=["hocomoco", "jaspar_vertebrates"])
        self.assertEqual(len(ms), 1875, msg="motif dictionary must contain sum of motifs in files from jaspar"
                                            "_vertebrates and hocomoco")

    def test_create_empty(self):
        ms = MotifSet(preload_motifs=None)
        self.assertEqual(len(ms), 0, msg="motif dictionary must be empty")
        motif_list = ms.get_motif_list(1.0, 0.0001)
        self.assertEqual(len(motif_list), 0)

    def test_create_non_empty(self):
        ms = MotifSet(preload_motifs="hocomoco")
        self.assertGreater(len(ms), 0, msg="motif dictionary must be non empty")

    def test_filter_values_not_dict(self):
        with self.assertRaises(ValueError):
            self.motif_set.filter("test")

    def test_filter_wrong_key_type(self):
        with self.assertRaises(ValueError):
            self.motif_set.filter({'test': []})

    def test_get_mappings_wrong_key_type(self):
        ms = MotifSet()
        with self.assertRaises(ValueError):
            ms.get_mappings("test")

    def test_filter_names(self):
        ms2 = self.motif_set.filter({'name': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"]}, search="exact")
        self.assertEqual(len(ms2), 1)
        motif_list = ms2.get_motif_list(1.0, 0.0001)
        self.assertEqual(len(motif_list), 1)

        ms2 = self.motif_set.filter({'name': ["ALX1"], 'species': ["H**o sapiens"]}, search="exact")
        self.assertEqual(len(ms2), 0)
        motif_list = ms2.get_motif_list(1.0, 0.0001)
        self.assertEqual(len(motif_list), 0)

        ms2 = self.motif_set.filter({'name': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"]}, search="inexact")
        self.assertEqual(len(ms2), 1)

        ms2 = self.motif_set.filter({'name': ["ALX1"], 'species': ["H**o sapiens"]}, search="inexact")
        self.assertEqual(len(ms2), 1)

        ms2 = self.motif_set.filter({'name': ["ALX"], 'species': ["H**o sapiens"]}, search="inexact")
        self.assertEqual(len(ms2), 3)

        ms2 = self.motif_set.filter({'name': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"]}, search="regex")
        self.assertEqual(len(ms2), 1)

        ms2 = self.motif_set.filter({'name': ["ALX1.*"], 'species': ["H**o sapiens"]}, search="regex")
        self.assertEqual(len(ms2), 1)

        ms2 = self.motif_set.filter({'name': ["ALX[134]_.*"], 'species': ["H**o sapiens"]}, search="regex")
        self.assertEqual(len(ms2), 3)

    def test_filter_genes(self):
        ms2 = self.motif_set.filter({'gene_names': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"]}, search="exact")
        self.assertEqual(len(ms2), 0)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter({'gene_names': ["ALX1"], 'species': ["H**o sapiens"]}, search="exact")
        self.assertEqual(len(ms2), 1)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 1)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'gene_names': ["ALX"], 'species': ["H**o sapiens"]}, search="exact")
        self.assertEqual(len(ms2), 0)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter({'gene_names': ["ALX1"], 'species': ["H**o sapiens"]}, search="inexact")
        self.assertEqual(len(ms2), 1)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 1)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'gene_names': ["ALX"], 'species': ["H**o sapiens"]}, search="inexact")
        self.assertEqual(len(ms2), 3)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 3)
        self.assertEqual(len(k2m), 3)

        ms2 = self.motif_set.filter({'gene_names': ["ALX1.*"], 'species': ["H**o sapiens"]}, search="regex")
        self.assertEqual(len(ms2), 1)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 1)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'gene_names': ["ALX[134]"], 'species': ["H**o sapiens"]}, search="regex")
        self.assertEqual(len(ms2), 3)
        m2k, k2m = ms2.get_mappings(key_type="gene_names")
        self.assertEqual(len(m2k), 3)
        self.assertEqual(len(k2m), 3)

    def test_filter_family(self):
        ms2 = self.motif_set.filter({'family': ["Paired-related HD factors"], 'species': ["H**o sapiens"]}, search="exact")
        self.assertEqual(len(ms2), 35)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 35)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'family': ["factors"], 'species': ["H**o sapiens"]}, search="exact")
        self.assertEqual(len(ms2), 0)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter({'family': ["Paired-related HD factors"], 'species': ["H**o sapiens"]}, search="inexact")
        self.assertEqual(len(ms2), 35)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 35)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'family': ["Paired-related HD"], 'species': ["H**o sapiens"]}, search="inexact")
        self.assertEqual(len(ms2), 35)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 35)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'family': ["factors"], 'species': ["H**o sapiens"]}, search="inexact")
        self.assertEqual(len(ms2), 673)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 673)
        self.assertEqual(len(k2m), 59)

        ms2 = self.motif_set.filter({'family': ["Paired.*factors"], 'species': ["H**o sapiens"]}, search="regex")
        self.assertEqual(len(ms2), 35)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 35)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'family': ["Paired-related.*"], 'species': ["H**o sapiens"]}, search="regex")
        self.assertEqual(len(ms2), 35)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 35)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'family': [".*factors"], 'species': ["H**o sapiens"]}, search="regex")
        self.assertEqual(len(ms2), 673)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 673)
        self.assertEqual(len(k2m), 59)

    def test_filter_uniprot(self):
        ms2 = self.motif_set.filter({'uniprot_ids': ["Q9H3D4"], 'species': ["H**o sapiens"]}, search="exact")
        self.assertEqual(len(ms2), 2)
        m2k, k2m = ms2.get_mappings(key_type="uniprot_ids")
        self.assertEqual(len(m2k), 2)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'uniprot_ids': ["Q9H"], 'species': ["H**o sapiens"]}, search="exact")
        self.assertEqual(len(ms2), 0)
        m2k, k2m = ms2.get_mappings(key_type="uniprot_ids")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter({'uniprot_ids': ["Q9H3D4"], 'species': ["H**o sapiens"]}, search="inexact")
        self.assertEqual(len(ms2), 2)
        m2k, k2m = ms2.get_mappings(key_type="uniprot_ids")
        self.assertEqual(len(m2k), 2)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'uniprot_ids': ["Q9H"], 'species': ["H**o sapiens"]}, search="inexact")
        self.assertEqual(len(ms2), 20)
        m2k, k2m = ms2.get_mappings(key_type="uniprot_ids")
        self.assertEqual(len(m2k), 20)
        self.assertEqual(len(k2m), 16)

        ms2 = self.motif_set.filter({'uniprot_ids': ["Q9H3D4"], 'species': ["H**o sapiens"]}, search="regex")
        self.assertEqual(len(ms2), 2)
        m2k, k2m = ms2.get_mappings(key_type="uniprot_ids")
        self.assertEqual(len(m2k), 2)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'uniprot_ids': ["Q9H.*"], 'species': ["H**o sapiens"]}, search="regex")
        self.assertEqual(len(ms2), 20)
        m2k, k2m = ms2.get_mappings(key_type="uniprot_ids")
        self.assertEqual(len(m2k), 20)
        self.assertEqual(len(k2m), 16)

    def test_filter_data_source(self):
        # implicitly, we are also testing the case insensitiveness of the string matching of all three types

        ms2 = self.motif_set.filter({'data_source': ["chip-seq"], 'species': ["H**o sapiens"]}, search="exact")
        self.assertEqual(len(ms2), 431)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 431)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'data_source': ["chip"], 'species': ["H**o sapiens", "Mus musculus"]}, search="exact")
        self.assertEqual(len(ms2), 0)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter({'data_source': ["chip-seq"], 'species': ["H**o sapiens"]}, search="inexact")
        self.assertEqual(len(ms2), 431)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 431)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'data_source': ["chip"], 'species': ["H**o sapiens"]}, search="inexact")
        self.assertEqual(len(ms2), 431)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 431)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'data_source': ["chip-seq"], 'species': ["H**o sapiens"]}, search="regex")
        self.assertEqual(len(ms2), 431)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 431)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'data_source': ["(chip|selex)"], 'species': ["H**o sapiens"]}, search="regex")
        self.assertEqual(len(ms2), 588)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 588)
        self.assertEqual(len(k2m), 2)

    def test_filter_database(self):
        ms2 = self.motif_set.filter({'database': ["hocomoco"]}, search="exact")
        self.assertEqual(len(ms2), 1296)
        m2k, k2m = ms2.get_mappings(key_type="database")
        self.assertEqual(len(m2k), 1296)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'database': ["jaspar_vertebrates"]}, search="exact")
        self.assertEqual(len(ms2), 0)
        m2k, k2m = ms2.get_mappings(key_type="database")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter({'database': ["jaspar_vertebrates"]}, search="inexact")
        self.assertEqual(len(ms2), 0)
        m2k, k2m = ms2.get_mappings(key_type="database")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter({'database': ["uniprobe"]}, search="inexact")
        self.assertEqual(len(ms2), 0)
        m2k, k2m = ms2.get_mappings(key_type="database")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter({'database': ["jaspar"]}, search="regex")
        self.assertEqual(len(ms2), 0)
        m2k, k2m = ms2.get_mappings(key_type="database")
        self.assertEqual(len(m2k), 0)
        self.assertEqual(len(k2m), 0)

        ms2 = self.motif_set.filter({'database': ["(hocomoco|jaspar)"]}, search="regex")
        self.assertEqual(len(ms2), 1296)
        m2k, k2m = ms2.get_mappings(key_type="database")
        self.assertEqual(len(m2k), 1296)
        self.assertEqual(len(k2m), 1)

    def test_filter(self):
        #test different combinations of key_types and keys

        ms2 = self.motif_set.filter({'data_source': ["chip-seq", "integrative"]}, search="exact")
        self.assertEqual(len(ms2), 1138)
        m2k, k2m = ms2.get_mappings(key_type="data_source")
        self.assertEqual(len(m2k), 1138)
        self.assertEqual(len(k2m), 2)

        ms2 = self.motif_set.filter(
            {'data_source': ["chip-seq", "integrative"], 'family': ["Steroid hormone receptors (NR3)"],
             'species': ["Mus musculus"]}, search="exact")
        self.assertEqual(len(ms2), 14)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 14)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'data_source': ["chip-seq"], 'family': ["Steroid hormone receptors (NR3)"],
                                     'tax_group': ["vertebrates", "plants"]}, search="exact")
        self.assertEqual(len(ms2), 25)
        m2k, k2m = ms2.get_mappings(key_type="tax_group")
        self.assertEqual(len(m2k), 25)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'data_source': ["chip-seq"], 'family': ["Steroid hormone receptors (NR3)"],
                                     'species': ["Mus musculus", "H**o sapiens"]}, search="exact")
        self.assertEqual(len(ms2), 25)
        m2k, k2m = ms2.get_mappings(key_type="species")
        self.assertEqual(len(m2k), 25)
        self.assertEqual(len(k2m), 2)

        ms2 = self.motif_set.filter({'data_source': ["chip"], 'family': ["NR3"], 'tax_group': ["brates"]},
                                    search="inexact")
        self.assertEqual(len(ms2), 25)
        m2k, k2m = ms2.get_mappings(key_type="tax_group")
        self.assertEqual(len(m2k), 25)
        self.assertEqual(len(k2m), 1)

        ms2 = self.motif_set.filter({'family': [".*related factors"]}, search="regex")
        self.assertEqual(len(ms2), 587)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 587)
        self.assertEqual(len(k2m), 36)
        motif_list = ms2.get_motif_list(1.0, 0.0001)
        self.assertEqual(len(motif_list), 587)

        ms2 = self.motif_set.filter({'data_source': ["(chip|integr)"], 'family': ["multiple"],
                                     'species': ["(musculus|sapiens)"]}, search="regex")
        self.assertEqual(len(ms2), 57)
        m2k, k2m = ms2.get_mappings(key_type="family")
        self.assertEqual(len(m2k), 57)
        self.assertEqual(len(k2m), 1)

    def test_filter_with_empty_dict(self):
        ms2 = self.motif_set.filter({}, search="exact")
        self.assertEqual(len(ms2), 1296)

    def test_create_motif_list(self):
        ms2 = self.motif_set.filter({'name': ["PITX"]}, search="inexact")  # 5 Motifs
        threshold = ms2["PITX2_HUMAN.H11MO.0.D"].thresholds[0.0001]
        # we remove the pre-calculated thresholds so we can test whether the calculation works
        for ma in iter(ms2):
            for fpr in [0.005, 0.001, 0.0005, 0.0001, 0.00005, 0.00001]:
                ma.thresholds[fpr] = []
        # is the new threshold equal to the mtf one?
        ml = ms2.get_motif_list(1.0, 0.0001)
        self.assertEqual(len(ml), len(ms2))
        self.assertEqual(ml[2].threshold, threshold, msg="create_motif_list calculates threshold incorrectly")
        # is the threshold calculated for non-standard fpr?
        for ma in iter(ms2):
            ma.thresholds = {}
        ml = ms2.get_motif_list(1.0, 0.0001)
        self.assertEqual(ml[2].threshold, threshold, msg="create_motif_list doesn't work for empty thresholds")
        self.assertEqual(len(ml), len(ms2))
import sys

from rgt.GeneSet import GeneSet
from rgt.MotifSet import MotifSet

motifs = [(l.strip("\n")).split("\t")[0] for l in open(sys.argv[1])]
geneset_file = sys.argv[2]
search_mode = sys.argv[3]

# preload all available motifs from the repositories
motif_set = MotifSet(preload_motifs=True)

genes = GeneSet("DC Genes")
genes.read_expression(geneset_file)

# take only a subset of the motifs (using their exact names)
motif_set, _, _ = motif_set.filter(motifs, key_type="name")

# of these new motif set, take the subset of those matching these gene names
# (we only care about the motif2gene mapping)
_, m_g, _ = motif_set.filter(genes.genes,
                             key_type="gene_names",
                             search=search_mode)

genes_found = []
not_found = []
print("\t\t" + ("\t".join(genes.cond)))
for m in motifs:
    try:
        sel_genes = m_g[m]
        for g in sel_genes:
Example #22
0
 def test_create_non_empty(self):
     ms = MotifSet(preload_motifs=True)
     self.assertGreater(len(ms.motifs_map),
                        0,
                        msg="motif dictionary must be non empty")
Example #23
0
class CustomDBTest(unittest.TestCase):
    def setUp(self):
        # use CustomDB
        self.motif_set = MotifSet(preload_motifs=[os.path.join(os.path.dirname(__file__), "TestCustomDB")],
                                  motif_dbs=True)

    def test_loading(self):
        self.assertEqual(len(self.motif_set.motifs_map), 3, msg="loaded wrong number of motifs")
        self.assertIsNone(self.motif_set.motifs_map["firstMotif_5.0.B"].gene_names, msg="gene_names not None")
        self.assertIsNone(self.motif_set.motifs_map["secondMotif_5.0.B"].data_source, msg="data_source not None")
        self.assertEqual(len(self.motif_set.motifs_map["thirdMotif_5.0.B"].thresholds), 0, msg="thresholds is not an empty dict")

    def test_built_in_functions(self):
        self.assertTrue(str(self.motif_set).startswith("MotifSet:{"), msg="str(ms): wrong format")
        self.assertTrue(repr(self.motif_set) == str(self.motif_set), msg="MotifSet: repr does not equal str")
        ms2 = self.motif_set.filter({'name': ['firstMotif_5.0.B']}, search="exact")
        self.assertTrue("'name': 'firstMotif_5.0.B'" in str(ms2), msg="str(ms2): wrong MotifMap")
        self.assertTrue(str(ms2).startswith("MotifSet:{"), msg="str(ms2): wrong format")
        ma = ms2.__getitem__("firstMotif_5.0.B")
        self.assertTrue("'name': 'firstMotif_5.0.B'" in str(ma), msg="str(ma): wrong Motif")
        self.assertTrue(repr(ma) == str(ma), msg="MotifAnnotation: repr does not equal str")

    def test_filter_values_not_dict(self):
        with self.assertRaises(ValueError):
            self.motif_set.filter("test")

    def test_filter_wrong_key_type(self):
        with self.assertRaises(ValueError):
            self.motif_set.filter({'test': []})

    def test_get_mappings_wrong_key_type(self):
        ms = MotifSet()
        with self.assertRaises(ValueError):
            ms.get_mappings("test")

    def test_filter_names(self):
        ms2 = self.motif_set.filter({'name': ["firstMotif_5.0.B"]}, search="exact")
        self.assertEqual(len(ms2), 1)
        motif_list = ms2.get_motif_list(1.0, 0.0001)
        self.assertEqual(len(motif_list), 1)

        ms2 = self.motif_set.filter({'name': ["secondMotif_5.0.B"]}, search="inexact")
        self.assertEqual(len(ms2), 1)
        motif_list = ms2.get_motif_list(1.0, 0.0001)
        self.assertEqual(len(motif_list), 1)

        ms2 = self.motif_set.filter({'name': ["thirdMotif_5.0.B"]}, search="regex")
        self.assertEqual(len(ms2), 1)
        motif_list = ms2.get_motif_list(1.0, 0.0001)
        self.assertEqual(len(motif_list), 1)

    def test_filter_database(self):
        ms2 = self.motif_set.filter({'database': ["hocomoco"]}, search="exact")
        self.assertEqual(len(ms2), 0)

        ms2 = self.motif_set.filter({'database': ["TestCustomDB"]}, search="exact")
        self.assertEqual(len(ms2), 3)

        ms2 = self.motif_set.filter({'database': ["TestCustomDB"]}, search="inexact")
        self.assertEqual(len(ms2), 3)

        ms2 = self.motif_set.filter({'database': ["TestCustomDB"]}, search="regex")
        self.assertEqual(len(ms2), 3)

    def test_filter_with_empty_dict(self):
        ms2 = self.motif_set.filter({}, search="exact")
        self.assertEqual(len(ms2), 3)
Example #24
0
search_mode = sys.argv[3]
# pvalue cuttoff for definition of active factors
pvalue = float(sys.argv[4])
# output file
out = sys.argv[5]
# genes to be used as potential targets
filter_targets = []
targets = None
if len(sys.argv) > 6:
    targets_file = sys.argv[6]
    # reading targets
    targets = GeneSet("genes")
    targets.read(targets_file)

# starting motif databases
motif_set = MotifSet()
if len(sys.argv) > 7:
    motif_set.read_file([sys.argv[7]])
else:
    motif_set.read_file([jaspar, uniprobe, internal])

# reading genes
factors = GeneSet("genes")
factors.read(factor_file)

# reading networks
#for f in glob.glob(enrichment_files):
#  # use last dir name as name for condition
#  condition=os.path.dirname(f)
#  condition=condition.split("/")[-1]
motif_set.read_motif_targets_enrichment(enrichment_files, pvalue)
Example #25
0
 def setUp(self):
     # use CustomDB
     self.motif_set = MotifSet(preload_motifs=[os.path.join(os.path.dirname(__file__), "TestCustomDB")],
                               motif_dbs=True)