def test_large_sizes(self): #Previous test failed on exterme coverage, testing that here #result = peaks.shuffle(1000, 5, 0, .05, [48] * 5000) #print "foo" #print result #self.assertEqual(sum(result), 5) #lets try a different example result = shuffle(136, 5, 0, .05, [48] * 2003) #print "bar" #print result self.assertEqual(sum(result), 5)
def test_shuffle(self): #Case: fail on null inputs self.assertRaises(TypeError, shuffle, (None, 1, 0, .05, [2, 3, 4])) self.assertRaises(TypeError, shuffle, (1, None, 0, .05, [2, 3, 4])) self.assertRaises(TypeError, shuffle, (1, 1, None, .05, [2, 3, 4])) self.assertRaises(TypeError, shuffle, (1, 1, 0, None, [2, 3, 4])) self.assertRaises(TypeError, shuffle, (1, 1, 0, .05, None)) #Case: fail on zero input for [] for the reads self.assertRaises(TypeError, shuffle, (1, 1, 0, .05, [])) #case fail on zero input for either length or #iterations self.assertRaises(TypeError, shuffle, (0, 1, 0, .05, [2, 3, 4])) self.assertRaises(TypeError, shuffle, (1, 0, 0, .05, [2, 3, 4])) #case succede and check results (need to figure how to lock down random for testing result = shuffle(100, 3, 0, .05, [5] * 50) self.assertEqual(sum(result), 3) #reads longer than gene self.assertEqual([0] * 100, shuffle(1, 1, 0, .05, [2, 3, 4]))
def test_shuffle(self): #Case: fail on null inputs self.assertRaises(TypeError, shuffle, (None, 1, 0, .05, [2,3,4])) self.assertRaises(TypeError, shuffle, (1, None, 0, .05, [2,3,4])) self.assertRaises(TypeError, shuffle, (1, 1, None, .05, [2,3,4])) self.assertRaises(TypeError, shuffle, (1, 1, 0, None, [2,3,4])) self.assertRaises(TypeError, shuffle, (1, 1, 0, .05, None)) #Case: fail on zero input for [] for the reads self.assertRaises(TypeError, shuffle, (1,1,0,.05, [])) #case fail on zero input for either length or #iterations self.assertRaises(TypeError, shuffle, (0, 1, 0, .05, [2,3,4])) self.assertRaises(TypeError, shuffle, (1, 0, 0, .05, [2,3,4])) #case succede and check results (need to figure how to lock down random for testing result = shuffle(100, 3, 0,.05, [5] * 50 ) self.assertEqual(sum(result), 3) #reads longer than gene self.assertEqual([0] * 100, shuffle(1, 1, 0, .05, [2,3,4]))
def get_FDR_cutoff_mean(readlengths, genelength, iterations=100, mincut=2, alpha=0.05): """ Find randomized method, as in FOX2ES NSMB paper. MEAN, not MODE scatter reads, calcaluate number of reads to pass fdr threshold, takes average observed cutoff readlengths -- list of lengths of aligned portions of reads genelength -- effective gene length (unalignable regions aren't counted) interations -- number of times to repeat FDR thresholding calculation mincut -- min threshold possible to return alpha -- FDR alpha Returns an int, the number of reads needed to meet the FDR cutoff TODO: Allow the minimum cutoff to be paramaritizied TODO: double check math on this """ #if you have very few reads on a gene, don't waste time #trying to find a cutoff if len(readlengths) < 20: return mincut results = shuffle(int(genelength), int(iterations), 0, .05, readlengths) total = 0 #parses results from peaks script, calculates mean from peaks results #should document peaks function call return value somewhere around here for cut, n_observed in enumerate(results): total += (cut * n_observed) #logic for min cutoffs cutoff = total / iterations if cutoff < mincut: cutoff = mincut return int(round(cutoff, 0))
def get_FDR_cutoff_mean(readlengths, genelength, iterations=100, mincut=2, alpha=0.05): """ Returns an int, the number of reads needed to meet the FDR cutoff by randomized method TODO: Allow the minimum cutoff to be paramaritizied TODO: double check math on this :param readlengths: list, list of lengths of aligned portions of reads :param genelength: int, effective gene length (unalignable regions aren't counted) :param iterations: int, default 100 :param mincut: int, default 2, min threshold possible to return :param alpha: float, default 0.05, FDR alpha :return: int, min number of reads per position to read FDR """ # if you have very few reads on a gene, don't waste time # trying to find a cutoff if len(readlengths) < 20: return mincut results = shuffle(int(genelength), int(iterations), 0, .05, readlengths) total = 0 # parses results from peaks script, calculates mean from peaks results # should document peaks function call return value somewhere around here for cut, n_observed in enumerate(results): total += (cut * n_observed) # logic for min cutoffs cutoff = total / iterations if cutoff < mincut: cutoff = mincut return int(round(cutoff, 0))
def test_small_sizes(self): #makes sure it works on edge cases result = shuffle(100, 3, 0, .05, [2, 3, 4]) print result
def test_super_large_sizes(self): result = shuffle(4434885, 5, 0, .05, [48] * 2003) #print "bar" #print result self.assertEqual(sum(result), 5)
def test_small_sizes(self): #makes sure it works on edge cases result = shuffle(100, 3, 0, .05, [2,3,4]) print result