예제 #1
0
 def test_index_finder2(self):
     ''' Test finding contiguos bins '''
     genomeBin = interactionMatrix.genomeBin((self.chrFile,10,False))
     self.assertEqual(genomeBin.findBinIndex('chr2',0), 'nobin')
     self.assertEqual(genomeBin.findBinIndex('chr2',1), 5)
     self.assertEqual(genomeBin.findBinIndex('chr2',9), 5)
     self.assertEqual(genomeBin.findBinIndex('chr2',10), 6)
     self.assertEqual(genomeBin.findBinIndex('chr2',25), 7)
     self.assertEqual(genomeBin.findBinIndex('chr2',26), 8)
     self.assertEqual(genomeBin.findBinIndex('chr2',33), 8)
     self.assertEqual(genomeBin.findBinIndex('chr2',34), 'nobin')
예제 #2
0
 def test_bin_generation6(self):    
     ''' Test unequal bin creation with small bins '''
     genomeBin = interactionMatrix.genomeBin((self.chrFile,41,True))
     chr1DF = pd.DataFrame()
     chr1DF['chr'] = np.array(['chr1'])
     chr1DF['start'] = np.array([3])
     chr1DF['end'] = np.array([43])
     chr1DF['index'] = np.array([0])
     chr2DF = pd.DataFrame(columns = ['chr','start','end','index'])
     self.assertTrue(all(genomeBin.binDict['chr1'] == chr1DF))
     self.assertTrue(all(genomeBin.binDict['chr2'] == chr2DF))
예제 #3
0
 def test_index_finder1(self):
     ''' Test finding non contiguous bins '''
     genomeBin = interactionMatrix.genomeBin(self.inBed1)
     self.assertEqual(genomeBin.findBinIndex('chr1',10), 0)
     self.assertEqual(genomeBin.findBinIndex('chr1',9), 'nobin')
     self.assertEqual(genomeBin.findBinIndex('chr1',30), 1)
     self.assertEqual(genomeBin.findBinIndex('chr1',29), 'nobin')
     self.assertEqual(genomeBin.findBinIndex('chr1',20), 0)
     self.assertEqual(genomeBin.findBinIndex('chr1',21), 'nobin')
     self.assertEqual(genomeBin.findBinIndex('chr1',40), 1)
     self.assertEqual(genomeBin.findBinIndex('chr1',41), 'nobin')
     self.assertEqual(genomeBin.findBinIndex('chr3',10), 'nochr')
예제 #4
0
 def test_bin_generation4(self):    
     ''' Test equal bin creation with small bins '''
     genomeBin = interactionMatrix.genomeBin((self.chrFile,10,True))
     chr1DF = pd.DataFrame()
     chr1DF['chr'] = np.array(['chr1'] * 4)
     chr1DF['start'] = np.array([4,14,24,34])
     chr1DF['end'] = np.array([13,23,33,43])
     chr1DF['index'] = np.array([0,1,2,3])
     chr2DF = pd.DataFrame()
     chr2DF['chr'] = np.array(['chr2'] * 3)
     chr2DF['start'] = np.array([2,12,22])
     chr2DF['end'] = np.array([11,21,31])
     chr2DF['index'] = np.array([4,5,6])
     self.assertTrue(all(genomeBin.binDict['chr1'] == chr1DF))
     self.assertTrue(all(genomeBin.binDict['chr2'] == chr2DF))
예제 #5
0
 def test_bin_generation3(self):    
     ''' Test unequal bin creation with large bins '''
     genomeBin = interactionMatrix.genomeBin((self.chrFile,40,False))
     chr1DF = pd.DataFrame()
     chr1DF['chr'] = np.array(['chr1'] * 2)
     chr1DF['start'] = np.array([1,24])
     chr1DF['end'] = np.array([23,46])
     chr1DF['index'] = np.array([0,1])
     chr2DF = pd.DataFrame()
     chr2DF['chr'] = np.array(['chr2'])
     chr2DF['start'] = np.array([1])
     chr2DF['end'] = np.array([33])
     chr2DF['index'] = np.array([2])
     self.assertTrue(all(genomeBin.binDict['chr1'] == chr1DF))
     self.assertTrue(all(genomeBin.binDict['chr2'] == chr2DF))
예제 #6
0
 def test_bin_generation2(self):    
     ''' Test unequal bin creation with small bins '''
     genomeBin = interactionMatrix.genomeBin((self.chrFile,11,False))
     chr1DF = pd.DataFrame()
     chr1DF['chr'] = np.array(['chr1'] * 5)
     chr1DF['start'] = np.array([1,11,20,29,38])
     chr1DF['end'] = np.array([10,19,28,37,46])
     chr1DF['index'] = np.array([0,1,2,3,4])
     chr2DF = pd.DataFrame()
     chr2DF['chr'] = np.array(['chr2'] * 3)
     chr2DF['start'] = np.array([1,12,23])
     chr2DF['end'] = np.array([11,22,33])
     chr2DF['index'] = np.array([5,6,7])
     self.assertTrue(all(genomeBin.binDict['chr1'] == chr1DF))
     self.assertTrue(all(genomeBin.binDict['chr2'] == chr2DF))
예제 #7
0
 def test_bin_generation7(self):    
     ''' Test bin creation from bed file '''
     genomeBin = interactionMatrix.genomeBin(self.inBed1)
     chr1DF = pd.DataFrame() 
     chr1DF['chr'] = np.array(['chr1'] * 2)
     chr1DF['start'] = np.array([10,30])
     chr1DF['end'] = np.array([20,40])
     chr1DF['index'] = np.array([0,1])
     chr2DF = pd.DataFrame()
     chr2DF['chr'] = np.array(['chr2'])
     chr2DF['start'] = np.array([5])
     chr2DF['end'] = np.array([15])
     chr2DF['index'] = np.array([2])
     self.assertTrue(all(genomeBin.binDict['chr1'] == chr1DF))
     self.assertTrue(all(genomeBin.binDict['chr2'] == chr2DF))
예제 #8
0
 def test_matrix_generation(self):
     ''' Test creation of matrix '''
     genomeBin = interactionMatrix.genomeBin((self.chrFile,10,True))
     countMatrix, logArray = interactionMatrix.generateMatrix(
         self.inFrag, genomeBin, threads=4)
     self.assertTrue(np.array_equal(countMatrix,
         np.array([
             [2,0,0,0,0,0,1],
             [0,0,0,1,0,0,0],
             [0,0,0,0,2,0,0],
             [0,1,0,0,0,0,0],
             [0,0,2,0,0,0,0],
             [0,0,0,0,0,0,0],
             [1,0,0,0,0,0,0]
         ])))
     self.assertTrue(np.array_equal(logArray, np.array([10,2,3,5])))
예제 #9
0
# Extract and print parameters to create bins
if args['bed']:
    binData = args['<bedfile>']
    print 'Parameters:\n\t%s\n\t%s' %(
        'bed file provided',
        'minimum bin count: %s' %(args['<mincount>'])
    )
else:
    binData = (args['<chrfile>'], args['<binsize>'], args['--equal'])
    print 'Parameters:\n  %s\n  %s\n  %s' %(
        'max bin size: %s' %(args['<binsize>']),
        'bin size equal: %s' %(args['--equal']),
        'minimum bin count: %s' %(args['<mincount>'])
    )
# Create bin object and save bed
genomeBins = interactionMatrix.genomeBin(binData)
# Sequentially process input files
failedBins = np.full(genomeBins.binCount, False, dtype=bool)
matrixFileList = []
for f in args['<inputfiles>']:
    # Extract sample names
    sampleName = re.search('([^/]*)\.fragLigations.gz$',f).group(1)
    # Create output file prefix
    if args['--label']:
        outPrefix = args['<outdir>'] + sampleName + '_' + args['--label']
    else:
        outPrefix = args['<outdir>'] + sampleName
    # Create output file names
    bedFile = outPrefix + '.bed'
    matrixFile = outPrefix + '.countMatrix.gz'
    biasFile = outPrefix + '.bias'
예제 #10
0
 def test_bin_generation8(self):
     ''' Test overlapping bins in bed file '''
     with self.assertRaises(IOError):
         interactionMatrix.genomeBin(self.inBed2)