Exemple #1
0
    def test_create_default(self):

        ms = MotifSet()
        self.assertEqual(
            len(ms.motifs_map),
            0,
            msg="motif dictionary must be empty by default (no preload)")
Exemple #2
0
    def test_match_multiple(self):

        ms = MotifSet(preload_motifs="default")
        ms = ms.filter({'database': ["jaspar_vertebrates"], 'name': ["MA0139.1.CTCF"]}, search="inexact")

        self.assertEqual(len(ms), 1)

        motif = ms.get_motif_list(1, 0.0001)[0]

        scanner = scan.Scanner(7)

        pssm_list, thresholds = [], []

        thresholds.append(motif.threshold)
        thresholds.append(motif.threshold)
        pssm_list.append(motif.pssm)
        pssm_list.append(motif.pssm_rc)

        bg = tools.flat_bg(4)
        scanner.set_motifs(pssm_list, bg, thresholds)

        genomic_region = GenomicRegion("chr1", 0, 5022)

        # Reading sequence associated to genomic_region
        sequence = str(self.genome_file.fetch(genomic_region.chrom, genomic_region.initial, genomic_region.final))

        grs = match_multiple(scanner, [motif], sequence, genomic_region)

        self.assertSequenceEqual(grs.sequences,
                                 [GenomicRegion("chr1", 4270, 4289, name="MA0139.1.CTCF", orientation="+"),
                                  GenomicRegion("chr1", 4180, 4199, name="MA0139.1.CTCF", orientation="-")])
Exemple #3
0
    def setUp(self):
        dirname = os.path.dirname(__file__)
        mtf_file = os.path.join(dirname, "../data/motifs/hocomoco.mtf")

        # we must enforce the use hocomoco as database
        self.motif_set = MotifSet(preload_motifs=False)
        self.motif_set.read_mtf(mtf_file)
 def test_built_in_functions(self):
     ms = MotifSet(preload_motifs="hocomoco")
     self.assertTrue(str(ms).startswith("MotifSet:{"),
                     msg="str(ms): wrong format")
     self.assertTrue(repr(ms) == str(ms),
                     msg="MotifSet: repr does not equal str")
     ms2 = ms.filter(
         {
             'name': ["ALX1_HUMAN.H11MO.0.B"],
             'species': ["H**o sapiens"]
         },
         search="exact")
     self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ms2),
                     msg="str(ms2): wrong MotifMap")
     self.assertTrue(str(ms2).startswith("MotifSet:{"),
                     msg="str(ms2): wrong format")
     ma = ms2.__getitem__("ALX1_HUMAN.H11MO.0.B")
     self.assertTrue(
         "'thresholds': {0.005: 3.1595, 0.001: 6.52, 0.0005: 7.778, 0.0001: 10.3565, "
         "5e-05: 11.318, 1e-05: 13.4015}" in str(ma),
         msg="str(ma): threshold missing")
     self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ma),
                     msg="str(ma): wrong Motif")
     self.assertTrue(repr(ma) == str(ma),
                     msg="MotifAnnotation: repr does not equal str")
 def test_create_multiple(self):
     ms = MotifSet(preload_motifs=["hocomoco", "jaspar_vertebrates"])
     self.assertEqual(
         len(ms),
         2042,
         msg=
         "motif dictionary must contain sum of motifs in files from jaspar"
         "_vertebrates and hocomoco")
 def test_create_default(self):
     ms = MotifSet()
     self.assertEqual(
         len(ms),
         0,
         msg="motif dictionary must be empty by default (no preload)")
     motif_list = ms.get_motif_list(1.0, 0.0001)
     self.assertEqual(len(motif_list), 0)
Exemple #7
0
search_mode = sys.argv[3]
# pvalue cuttoff for definition of active factors
pvalue = float(sys.argv[4])
# output file
out = sys.argv[5]
# genes to be used as potential targets
filter_targets = []
targets = None
if len(sys.argv) > 6:
    targets_file = sys.argv[6]
    # reading targets
    targets = GeneSet("genes")
    targets.read(targets_file)

# starting motif databases
motif_set = MotifSet()
if len(sys.argv) > 7:
    motif_set.read_file([sys.argv[7]])
else:
    motif_set.read_file([jaspar, uniprobe, internal])

# reading genes
factors = GeneSet("genes")
factors.read(factor_file)

# reading networks
#for f in glob.glob(enrichment_files):
#  # use last dir name as name for condition
#  condition=os.path.dirname(f)
#  condition=condition.split("/")[-1]
motif_set.read_motif_targets_enrichment(enrichment_files, pvalue)
Exemple #8
0
 def test_create_non_empty(self):
     ms = MotifSet(preload_motifs=True)
     self.assertGreater(len(ms.motifs_map),
                        0,
                        msg="motif dictionary must be non empty")
Exemple #9
0
 def test_create_empty(self):
     ms = MotifSet(preload_motifs=False)
     self.assertEqual(len(ms.motifs_map),
                      0,
                      msg="motif dictionary must be empty")
 def test_get_mappings_wrong_key_type(self):
     ms = MotifSet()
     with self.assertRaises(ValueError):
         ms.get_mappings("test")
 def test_create_non_empty(self):
     ms = MotifSet(preload_motifs="hocomoco")
     self.assertGreater(len(ms), 0, msg="motif dictionary must be non empty")
 def test_create_empty(self):
     ms = MotifSet(preload_motifs=None)
     self.assertEqual(len(ms), 0, msg="motif dictionary must be empty")
     motif_list = ms.get_motif_list(1.0, 0.0001)
     self.assertEqual(len(motif_list), 0)
 def setUp(self):
     # use CustomDB
     self.motif_set = MotifSet(preload_motifs=[os.path.join(os.path.dirname(__file__), "TestCustomDB")],
                               motif_dbs=True)
 def setUp(self):
     # we must enforce the use hocomoco as database
     self.motif_set = MotifSet(preload_motifs="hocomoco")
import sys

from rgt.GeneSet import GeneSet
from rgt.MotifSet import MotifSet

motifs = [(l.strip("\n")).split("\t")[0] for l in open(sys.argv[1])]
geneset_file = sys.argv[2]
search_mode = sys.argv[3]

# preload all available motifs from the repositories
motif_set = MotifSet(preload_motifs=True)

genes = GeneSet("DC Genes")
genes.read_expression(geneset_file)

# take only a subset of the motifs (using their exact names)
motif_set, _, _ = motif_set.filter(motifs, key_type="name")

# of these new motif set, take the subset of those matching these gene names
# (we only care about the motif2gene mapping)
_, m_g, _ = motif_set.filter(genes.genes,
                             key_type="gene_names",
                             search=search_mode)

genes_found = []
not_found = []
print("\t\t" + ("\t".join(genes.cond)))
for m in motifs:
    try:
        sel_genes = m_g[m]
        for g in sel_genes: