def test_index_finder2(self): ''' Test finding contiguos bins ''' genomeBin = interactionMatrix.genomeBin((self.chrFile,10,False)) self.assertEqual(genomeBin.findBinIndex('chr2',0), 'nobin') self.assertEqual(genomeBin.findBinIndex('chr2',1), 5) self.assertEqual(genomeBin.findBinIndex('chr2',9), 5) self.assertEqual(genomeBin.findBinIndex('chr2',10), 6) self.assertEqual(genomeBin.findBinIndex('chr2',25), 7) self.assertEqual(genomeBin.findBinIndex('chr2',26), 8) self.assertEqual(genomeBin.findBinIndex('chr2',33), 8) self.assertEqual(genomeBin.findBinIndex('chr2',34), 'nobin')
def test_bin_generation6(self): ''' Test unequal bin creation with small bins ''' genomeBin = interactionMatrix.genomeBin((self.chrFile,41,True)) chr1DF = pd.DataFrame() chr1DF['chr'] = np.array(['chr1']) chr1DF['start'] = np.array([3]) chr1DF['end'] = np.array([43]) chr1DF['index'] = np.array([0]) chr2DF = pd.DataFrame(columns = ['chr','start','end','index']) self.assertTrue(all(genomeBin.binDict['chr1'] == chr1DF)) self.assertTrue(all(genomeBin.binDict['chr2'] == chr2DF))
def test_index_finder1(self): ''' Test finding non contiguous bins ''' genomeBin = interactionMatrix.genomeBin(self.inBed1) self.assertEqual(genomeBin.findBinIndex('chr1',10), 0) self.assertEqual(genomeBin.findBinIndex('chr1',9), 'nobin') self.assertEqual(genomeBin.findBinIndex('chr1',30), 1) self.assertEqual(genomeBin.findBinIndex('chr1',29), 'nobin') self.assertEqual(genomeBin.findBinIndex('chr1',20), 0) self.assertEqual(genomeBin.findBinIndex('chr1',21), 'nobin') self.assertEqual(genomeBin.findBinIndex('chr1',40), 1) self.assertEqual(genomeBin.findBinIndex('chr1',41), 'nobin') self.assertEqual(genomeBin.findBinIndex('chr3',10), 'nochr')
def test_bin_generation4(self): ''' Test equal bin creation with small bins ''' genomeBin = interactionMatrix.genomeBin((self.chrFile,10,True)) chr1DF = pd.DataFrame() chr1DF['chr'] = np.array(['chr1'] * 4) chr1DF['start'] = np.array([4,14,24,34]) chr1DF['end'] = np.array([13,23,33,43]) chr1DF['index'] = np.array([0,1,2,3]) chr2DF = pd.DataFrame() chr2DF['chr'] = np.array(['chr2'] * 3) chr2DF['start'] = np.array([2,12,22]) chr2DF['end'] = np.array([11,21,31]) chr2DF['index'] = np.array([4,5,6]) self.assertTrue(all(genomeBin.binDict['chr1'] == chr1DF)) self.assertTrue(all(genomeBin.binDict['chr2'] == chr2DF))
def test_bin_generation3(self): ''' Test unequal bin creation with large bins ''' genomeBin = interactionMatrix.genomeBin((self.chrFile,40,False)) chr1DF = pd.DataFrame() chr1DF['chr'] = np.array(['chr1'] * 2) chr1DF['start'] = np.array([1,24]) chr1DF['end'] = np.array([23,46]) chr1DF['index'] = np.array([0,1]) chr2DF = pd.DataFrame() chr2DF['chr'] = np.array(['chr2']) chr2DF['start'] = np.array([1]) chr2DF['end'] = np.array([33]) chr2DF['index'] = np.array([2]) self.assertTrue(all(genomeBin.binDict['chr1'] == chr1DF)) self.assertTrue(all(genomeBin.binDict['chr2'] == chr2DF))
def test_bin_generation2(self): ''' Test unequal bin creation with small bins ''' genomeBin = interactionMatrix.genomeBin((self.chrFile,11,False)) chr1DF = pd.DataFrame() chr1DF['chr'] = np.array(['chr1'] * 5) chr1DF['start'] = np.array([1,11,20,29,38]) chr1DF['end'] = np.array([10,19,28,37,46]) chr1DF['index'] = np.array([0,1,2,3,4]) chr2DF = pd.DataFrame() chr2DF['chr'] = np.array(['chr2'] * 3) chr2DF['start'] = np.array([1,12,23]) chr2DF['end'] = np.array([11,22,33]) chr2DF['index'] = np.array([5,6,7]) self.assertTrue(all(genomeBin.binDict['chr1'] == chr1DF)) self.assertTrue(all(genomeBin.binDict['chr2'] == chr2DF))
def test_bin_generation7(self): ''' Test bin creation from bed file ''' genomeBin = interactionMatrix.genomeBin(self.inBed1) chr1DF = pd.DataFrame() chr1DF['chr'] = np.array(['chr1'] * 2) chr1DF['start'] = np.array([10,30]) chr1DF['end'] = np.array([20,40]) chr1DF['index'] = np.array([0,1]) chr2DF = pd.DataFrame() chr2DF['chr'] = np.array(['chr2']) chr2DF['start'] = np.array([5]) chr2DF['end'] = np.array([15]) chr2DF['index'] = np.array([2]) self.assertTrue(all(genomeBin.binDict['chr1'] == chr1DF)) self.assertTrue(all(genomeBin.binDict['chr2'] == chr2DF))
def test_matrix_generation(self): ''' Test creation of matrix ''' genomeBin = interactionMatrix.genomeBin((self.chrFile,10,True)) countMatrix, logArray = interactionMatrix.generateMatrix( self.inFrag, genomeBin, threads=4) self.assertTrue(np.array_equal(countMatrix, np.array([ [2,0,0,0,0,0,1], [0,0,0,1,0,0,0], [0,0,0,0,2,0,0], [0,1,0,0,0,0,0], [0,0,2,0,0,0,0], [0,0,0,0,0,0,0], [1,0,0,0,0,0,0] ]))) self.assertTrue(np.array_equal(logArray, np.array([10,2,3,5])))
# Extract and print parameters to create bins if args['bed']: binData = args['<bedfile>'] print 'Parameters:\n\t%s\n\t%s' %( 'bed file provided', 'minimum bin count: %s' %(args['<mincount>']) ) else: binData = (args['<chrfile>'], args['<binsize>'], args['--equal']) print 'Parameters:\n %s\n %s\n %s' %( 'max bin size: %s' %(args['<binsize>']), 'bin size equal: %s' %(args['--equal']), 'minimum bin count: %s' %(args['<mincount>']) ) # Create bin object and save bed genomeBins = interactionMatrix.genomeBin(binData) # Sequentially process input files failedBins = np.full(genomeBins.binCount, False, dtype=bool) matrixFileList = [] for f in args['<inputfiles>']: # Extract sample names sampleName = re.search('([^/]*)\.fragLigations.gz$',f).group(1) # Create output file prefix if args['--label']: outPrefix = args['<outdir>'] + sampleName + '_' + args['--label'] else: outPrefix = args['<outdir>'] + sampleName # Create output file names bedFile = outPrefix + '.bed' matrixFile = outPrefix + '.countMatrix.gz' biasFile = outPrefix + '.bias'
def test_bin_generation8(self): ''' Test overlapping bins in bed file ''' with self.assertRaises(IOError): interactionMatrix.genomeBin(self.inBed2)