def test_create_default(self): ms = MotifSet() self.assertEqual( len(ms.motifs_map), 0, msg="motif dictionary must be empty by default (no preload)")
def test_match_multiple(self): ms = MotifSet(preload_motifs="default") ms = ms.filter({'database': ["jaspar_vertebrates"], 'name': ["MA0139.1.CTCF"]}, search="inexact") self.assertEqual(len(ms), 1) motif = ms.get_motif_list(1, 0.0001)[0] scanner = scan.Scanner(7) pssm_list, thresholds = [], [] thresholds.append(motif.threshold) thresholds.append(motif.threshold) pssm_list.append(motif.pssm) pssm_list.append(motif.pssm_rc) bg = tools.flat_bg(4) scanner.set_motifs(pssm_list, bg, thresholds) genomic_region = GenomicRegion("chr1", 0, 5022) # Reading sequence associated to genomic_region sequence = str(self.genome_file.fetch(genomic_region.chrom, genomic_region.initial, genomic_region.final)) grs = match_multiple(scanner, [motif], sequence, genomic_region) self.assertSequenceEqual(grs.sequences, [GenomicRegion("chr1", 4270, 4289, name="MA0139.1.CTCF", orientation="+"), GenomicRegion("chr1", 4180, 4199, name="MA0139.1.CTCF", orientation="-")])
def setUp(self): dirname = os.path.dirname(__file__) mtf_file = os.path.join(dirname, "../data/motifs/hocomoco.mtf") # we must enforce the use hocomoco as database self.motif_set = MotifSet(preload_motifs=False) self.motif_set.read_mtf(mtf_file)
def test_built_in_functions(self): ms = MotifSet(preload_motifs="hocomoco") self.assertTrue(str(ms).startswith("MotifSet:{"), msg="str(ms): wrong format") self.assertTrue(repr(ms) == str(ms), msg="MotifSet: repr does not equal str") ms2 = ms.filter( { 'name': ["ALX1_HUMAN.H11MO.0.B"], 'species': ["H**o sapiens"] }, search="exact") self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ms2), msg="str(ms2): wrong MotifMap") self.assertTrue(str(ms2).startswith("MotifSet:{"), msg="str(ms2): wrong format") ma = ms2.__getitem__("ALX1_HUMAN.H11MO.0.B") self.assertTrue( "'thresholds': {0.005: 3.1595, 0.001: 6.52, 0.0005: 7.778, 0.0001: 10.3565, " "5e-05: 11.318, 1e-05: 13.4015}" in str(ma), msg="str(ma): threshold missing") self.assertTrue("'name': 'ALX1_HUMAN.H11MO.0.B'" in str(ma), msg="str(ma): wrong Motif") self.assertTrue(repr(ma) == str(ma), msg="MotifAnnotation: repr does not equal str")
def test_create_multiple(self): ms = MotifSet(preload_motifs=["hocomoco", "jaspar_vertebrates"]) self.assertEqual( len(ms), 2042, msg= "motif dictionary must contain sum of motifs in files from jaspar" "_vertebrates and hocomoco")
def test_create_default(self): ms = MotifSet() self.assertEqual( len(ms), 0, msg="motif dictionary must be empty by default (no preload)") motif_list = ms.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 0)
search_mode = sys.argv[3] # pvalue cuttoff for definition of active factors pvalue = float(sys.argv[4]) # output file out = sys.argv[5] # genes to be used as potential targets filter_targets = [] targets = None if len(sys.argv) > 6: targets_file = sys.argv[6] # reading targets targets = GeneSet("genes") targets.read(targets_file) # starting motif databases motif_set = MotifSet() if len(sys.argv) > 7: motif_set.read_file([sys.argv[7]]) else: motif_set.read_file([jaspar, uniprobe, internal]) # reading genes factors = GeneSet("genes") factors.read(factor_file) # reading networks #for f in glob.glob(enrichment_files): # # use last dir name as name for condition # condition=os.path.dirname(f) # condition=condition.split("/")[-1] motif_set.read_motif_targets_enrichment(enrichment_files, pvalue)
def test_create_non_empty(self): ms = MotifSet(preload_motifs=True) self.assertGreater(len(ms.motifs_map), 0, msg="motif dictionary must be non empty")
def test_create_empty(self): ms = MotifSet(preload_motifs=False) self.assertEqual(len(ms.motifs_map), 0, msg="motif dictionary must be empty")
def test_get_mappings_wrong_key_type(self): ms = MotifSet() with self.assertRaises(ValueError): ms.get_mappings("test")
def test_create_non_empty(self): ms = MotifSet(preload_motifs="hocomoco") self.assertGreater(len(ms), 0, msg="motif dictionary must be non empty")
def test_create_empty(self): ms = MotifSet(preload_motifs=None) self.assertEqual(len(ms), 0, msg="motif dictionary must be empty") motif_list = ms.get_motif_list(1.0, 0.0001) self.assertEqual(len(motif_list), 0)
def setUp(self): # use CustomDB self.motif_set = MotifSet(preload_motifs=[os.path.join(os.path.dirname(__file__), "TestCustomDB")], motif_dbs=True)
def setUp(self): # we must enforce the use hocomoco as database self.motif_set = MotifSet(preload_motifs="hocomoco")
import sys from rgt.GeneSet import GeneSet from rgt.MotifSet import MotifSet motifs = [(l.strip("\n")).split("\t")[0] for l in open(sys.argv[1])] geneset_file = sys.argv[2] search_mode = sys.argv[3] # preload all available motifs from the repositories motif_set = MotifSet(preload_motifs=True) genes = GeneSet("DC Genes") genes.read_expression(geneset_file) # take only a subset of the motifs (using their exact names) motif_set, _, _ = motif_set.filter(motifs, key_type="name") # of these new motif set, take the subset of those matching these gene names # (we only care about the motif2gene mapping) _, m_g, _ = motif_set.filter(genes.genes, key_type="gene_names", search=search_mode) genes_found = [] not_found = [] print("\t\t" + ("\t".join(genes.cond))) for m in motifs: try: sel_genes = m_g[m] for g in sel_genes: