Beispiel #1
0
 def test_large_sizes(self):
     #Previous test failed on exterme coverage, testing that here
     #result = peaks.shuffle(1000, 5, 0, .05, [48] * 5000)
     #print "foo"
     #print result
     #self.assertEqual(sum(result), 5)
     
     #lets try a different example
     result = shuffle(136, 5, 0, .05, [48] * 2003)
     #print "bar"
     #print result
     self.assertEqual(sum(result), 5)
Beispiel #2
0
    def test_large_sizes(self):
        #Previous test failed on exterme coverage, testing that here
        #result = peaks.shuffle(1000, 5, 0, .05, [48] * 5000)
        #print "foo"
        #print result
        #self.assertEqual(sum(result), 5)

        #lets try a different example
        result = shuffle(136, 5, 0, .05, [48] * 2003)
        #print "bar"
        #print result
        self.assertEqual(sum(result), 5)
Beispiel #3
0
    def test_shuffle(self):

        #Case: fail on null inputs
        self.assertRaises(TypeError, shuffle, (None, 1, 0, .05, [2, 3, 4]))
        self.assertRaises(TypeError, shuffle, (1, None, 0, .05, [2, 3, 4]))
        self.assertRaises(TypeError, shuffle, (1, 1, None, .05, [2, 3, 4]))
        self.assertRaises(TypeError, shuffle, (1, 1, 0, None, [2, 3, 4]))
        self.assertRaises(TypeError, shuffle, (1, 1, 0, .05, None))

        #Case: fail on zero input for [] for the reads
        self.assertRaises(TypeError, shuffle, (1, 1, 0, .05, []))

        #case fail on zero input for either length or #iterations
        self.assertRaises(TypeError, shuffle, (0, 1, 0, .05, [2, 3, 4]))
        self.assertRaises(TypeError, shuffle, (1, 0, 0, .05, [2, 3, 4]))

        #case succede and check results (need to figure how to lock down random for testing
        result = shuffle(100, 3, 0, .05, [5] * 50)
        self.assertEqual(sum(result), 3)

        #reads longer than gene
        self.assertEqual([0] * 100, shuffle(1, 1, 0, .05, [2, 3, 4]))
Beispiel #4
0
    def test_shuffle(self):
        
        #Case: fail on null inputs
        self.assertRaises(TypeError, shuffle, (None, 1, 0, .05, [2,3,4]))
        self.assertRaises(TypeError, shuffle, (1, None, 0, .05, [2,3,4]))
        self.assertRaises(TypeError, shuffle, (1, 1, None, .05, [2,3,4]))
        self.assertRaises(TypeError, shuffle, (1, 1, 0, None, [2,3,4]))
        self.assertRaises(TypeError, shuffle, (1, 1, 0, .05, None))
            
        #Case: fail on zero input for [] for the reads
        self.assertRaises(TypeError, shuffle, (1,1,0,.05, []))

        #case fail on zero input for either length or #iterations
        self.assertRaises(TypeError, shuffle, (0, 1, 0, .05, [2,3,4]))
        self.assertRaises(TypeError, shuffle, (1, 0, 0, .05, [2,3,4]))
        
        #case succede and check results (need to figure how to lock down random for testing
        result = shuffle(100, 3, 0,.05, [5] * 50 )
        self.assertEqual(sum(result), 3)
        

        
        #reads longer than gene
        self.assertEqual([0] * 100, shuffle(1, 1, 0, .05, [2,3,4]))
Beispiel #5
0
def get_FDR_cutoff_mean(readlengths, 
                        genelength, 
                        iterations=100, 
                        mincut=2, 
                        alpha=0.05):
    """
    
    Find randomized method, as in FOX2ES NSMB paper.
    MEAN, not MODE
    scatter reads, calcaluate number of reads to pass fdr threshold, takes average observed cutoff
    readlengths -- list of lengths of aligned portions of reads
    genelength -- effective gene length (unalignable regions aren't counted)
    interations -- number of times to repeat FDR thresholding calculation 
    mincut -- min threshold possible to return
    alpha -- FDR alpha 
    
    Returns an int, the number of reads needed to meet the FDR cutoff
    TODO: Allow the minimum cutoff to be paramaritizied
    TODO: double check math on this
    
    """

    
    
    #if you have very few reads on a gene, don't waste time 
    #trying to find a cutoff        
    if len(readlengths) < 20:
        return mincut
    
    results = shuffle(int(genelength), int(iterations), 0, .05, readlengths) 
    total = 0

    
    #parses results from peaks script, calculates mean from peaks results 
    #should document peaks function call return value somewhere around here
        
    for cut, n_observed in enumerate(results):
        total += (cut * n_observed)
        
    #logic for min cutoffs 
    cutoff = total / iterations
    if cutoff < mincut:
        cutoff = mincut
    return int(round(cutoff, 0))
Beispiel #6
0
def get_FDR_cutoff_mean(readlengths, 
                        genelength, 
                        iterations=100, 
                        mincut=2, 
                        alpha=0.05):
    """
    
    Find randomized method, as in FOX2ES NSMB paper.
    MEAN, not MODE
    scatter reads, calcaluate number of reads to pass fdr threshold, takes average observed cutoff
    readlengths -- list of lengths of aligned portions of reads
    genelength -- effective gene length (unalignable regions aren't counted)
    interations -- number of times to repeat FDR thresholding calculation 
    mincut -- min threshold possible to return
    alpha -- FDR alpha 
    
    Returns an int, the number of reads needed to meet the FDR cutoff
    TODO: Allow the minimum cutoff to be paramaritizied
    TODO: double check math on this
    
    """

    
    
    #if you have very few reads on a gene, don't waste time 
    #trying to find a cutoff        
    if len(readlengths) < 20:
        return mincut
    
    results = shuffle(int(genelength), int(iterations), 0, .05, readlengths) 
    total = 0

    
    #parses results from peaks script, calculates mean from peaks results 
    #should document peaks function call return value somewhere around here
        
    for cut, n_observed in enumerate(results):
        total += (cut * n_observed)
        
    #logic for min cutoffs 
    cutoff = total / iterations
    if cutoff < mincut:
        cutoff = mincut
    return int(round(cutoff, 0))
Beispiel #7
0
def get_FDR_cutoff_mean(readlengths,
                        genelength,
                        iterations=100,
                        mincut=2,
                        alpha=0.05):
    """
    Returns an int, the number of reads needed to meet the FDR cutoff by randomized method
    TODO: Allow the minimum cutoff to be paramaritizied
    TODO: double check math on this
    :param readlengths: list,  list of lengths of aligned portions of reads
    :param genelength: int, effective gene length (unalignable regions aren't counted)
    :param iterations: int, default 100
    :param mincut: int, default 2, min threshold possible to return
    :param alpha: float, default 0.05, FDR alpha
    :return: int, min number of reads per position to read FDR
    """

    # if you have very few reads on a gene, don't waste time
    # trying to find a cutoff
    if len(readlengths) < 20:
        return mincut

    results = shuffle(int(genelength), int(iterations), 0, .05, readlengths)
    total = 0

    # parses results from peaks script, calculates mean from peaks results
    # should document peaks function call return value somewhere around here

    for cut, n_observed in enumerate(results):
        total += (cut * n_observed)

    # logic for min cutoffs
    cutoff = total / iterations
    if cutoff < mincut:
        cutoff = mincut
    return int(round(cutoff, 0))
Beispiel #8
0
 def test_small_sizes(self):
     #makes sure it works on edge cases
     result = shuffle(100, 3, 0, .05, [2, 3, 4])
     print result
Beispiel #9
0
 def test_super_large_sizes(self):
     result = shuffle(4434885, 5, 0, .05, [48] * 2003)
     #print "bar"
     #print result
     self.assertEqual(sum(result), 5)
Beispiel #10
0
 def test_small_sizes(self):
     #makes sure it works on edge cases
     result = shuffle(100, 3, 0, .05, [2,3,4])
     print result
Beispiel #11
0
 def test_super_large_sizes(self):
     result = shuffle(4434885, 5, 0, .05, [48] * 2003)
     #print "bar"
     #print result
     self.assertEqual(sum(result), 5)