def test_build_matrix_cooler(): outfile = NamedTemporaryFile(suffix='.cool', delete=False) outfile.close() qc_folder = mkdtemp(prefix="testQC_") args = "-s {} {} --outFileName {} -bs 5000 -b /tmp/test.bam --QCfolder {} --threads 4".format( sam_R1, sam_R2, outfile.name, qc_folder).split() hicBuildMatrix.main(args) test = hm.hiCMatrix(ROOT + "small_test_matrix_parallel.h5") new = hm.hiCMatrix(outfile.name) nt.assert_equal(test.matrix.data, new.matrix.data) # nt.assert_equal(test.cut_intervals, new.cut_intervals) nt.assert_equal(len(new.cut_intervals), len(test.cut_intervals)) cut_interval_new_ = [] cut_interval_test_ = [] for x in new.cut_intervals: cut_interval_new_.append(x[:3]) for x in test.cut_intervals: cut_interval_test_.append(x[:3]) nt.assert_equal(cut_interval_new_, cut_interval_test_) # print(set(os.listdir(ROOT + "QC/"))) assert are_files_equal(ROOT + "QC/QC.log", qc_folder + "/QC.log") assert set(os.listdir(ROOT + "QC/")) == set(os.listdir(qc_folder)) os.unlink(outfile.name) shutil.rmtree(qc_folder)
def test_build_matrix_rf(): outfile = NamedTemporaryFile(suffix='.h5', delete=False) outfile.close() qc_folder = mkdtemp(prefix="testQC_") args = "-s {} {} -rs {} --outFileName {} --QCfolder {} " \ "--restrictionSequence GATC " \ "--danglingSequence GATC " \ "--minDistance 150 " \ "--maxLibraryInsertSize 1500 --threads 4".format(sam_R1, sam_R2, dpnii_file, outfile.name, qc_folder).split() hicBuildMatrix.main(args) test = hm.hiCMatrix(ROOT + "small_test_rf_matrix.h5") new = hm.hiCMatrix(outfile.name) nt.assert_equal(test.matrix.data, new.matrix.data) nt.assert_equal(test.cut_intervals, new.cut_intervals) print(set(os.listdir(ROOT + "QC_rc/"))) assert are_files_equal(ROOT + "QC_rc/QC.log", qc_folder + "/QC.log") assert set(os.listdir(ROOT + "QC_rc/")) == set(os.listdir(qc_folder)) os.unlink(outfile.name) shutil.rmtree(qc_folder)
def test_build_matrix_restrictionCutFile_eight( sam1, sam2, outFile, qcFolder, outBam, binSize, restrictionCutFile, minDistance, maxDistance, maxLibraryInsertSize, restrictionSequence, danglingSequence, region, removeSelfLigation, minMappingQuality, threads, inputBufferSize): # added skipDuplicationCheck args = "-s {} {} --restrictionCutFile {} --outFileName {} --QCfolder {} " \ "--restrictionSequence {} " \ "--danglingSequence {} " \ "--minDistance {} " \ "--maxLibraryInsertSize {} --threads {} " \ "--removeSelfLigation {} --keepSelfCircles " \ "--minMappingQuality {} --inputBufferSize {} " \ "--doTestRun --skipDuplicationCheck ".format(bam_R1, bam_R2, restrictionCutFile, outFile.name, qcFolder, restrictionSequence, danglingSequence, minDistance, maxLibraryInsertSize, threads, removeSelfLigation, minMappingQuality, inputBufferSize).split() hicBuildMatrix.main(args) os.unlink(outFile.name) shutil.rmtree(qcFolder)
def test_build_matrix(capsys): outfile = NamedTemporaryFile(suffix='.h5', delete=False) outfile.close() qc_folder = mkdtemp(prefix="testQC_") args = "-s {} {} --outFileName {} -bs 5000 -b /tmp/test.bam --QCfolder {} --threads 4".format( sam_R1, sam_R2, outfile.name, qc_folder).split() hicBuildMatrix.main(args) test = hm.hiCMatrix(ROOT + "small_test_matrix_parallel.h5") new = hm.hiCMatrix(outfile.name) nt.assert_equal(test.matrix.data, new.matrix.data) nt.assert_equal(test.cut_intervals, new.cut_intervals) # print("MATRIX NAME:", outfile.name) print(set(os.listdir(ROOT + "QC/"))) assert are_files_equal(ROOT + "QC/QC.log", qc_folder + "/QC.log") assert set(os.listdir(ROOT + "QC/")) == set(os.listdir(qc_folder)) # accept delta of 60 kb, file size is around 4.5 MB assert abs( os.path.getsize(ROOT + "small_test_matrix_result.bam") - os.path.getsize("/tmp/test.bam")) < 64000 os.unlink(outfile.name) shutil.rmtree(qc_folder) os.unlink("/tmp/test.bam")
def main(args=None): args = parse_arguments().parse_args(args) if not os.path.exists(args.QCfolder): try: os.makedirs(args.QCfolder) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise outFile = NamedTemporaryFile(suffix='.h5', delete=False) args_hicBuildMatrix = "--samFiles {} {} --outFileName {} --QCfolder {} --doTestRun --doTestRunLines {} --threads 1 ".format( args.samFiles[0], args.samFiles[1], outFile.name, args.QCfolder, str(args.lines)).split() args_hicBuildMatrix.append('--binSize') args_hicBuildMatrix.append(str(10000)) if args.restrictionSequence: args_hicBuildMatrix.append('--restrictionSequence') for restrictionSequence in args.restrictionSequence: args_hicBuildMatrix.append(restrictionSequence) if args.danglingSequence: args_hicBuildMatrix.append('--danglingSequence') for danglingSequence in args.danglingSequence: args_hicBuildMatrix.append(danglingSequence) if args.danglingSequence: args_hicBuildMatrix.append('--restrictionCutFile') for restrictionCutFile in args.restrictionCutFile: args_hicBuildMatrix.append(restrictionCutFile.name) log.debug('args_hicBuildMatrix {}'.format(args_hicBuildMatrix)) hicBuildMatrix.main(args_hicBuildMatrix)
def test_build_matrix_restrictionCutFile_four( sam1, sam2, outFile, qcFolder, outBam, binSize, restrictionCutFile, minDistance, maxDistance, maxLibraryInsertSize, restrictionSequence, danglingSequence, region, removeSelfLigation, minMappingQuality, threads, inputBufferSize): # test more params with restrictionCutFile (now without region param) args = "-s {} {} --restrictionCutFile {} --outFileName {} --QCfolder {} " \ "--restrictionSequence {} " \ "--danglingSequence {} " \ "--minDistance {} " \ "--maxLibraryInsertSize {} --threads {} " \ "--removeSelfLigation {} --keepSelfCircles ".format(bam_R1, bam_R2, restrictionCutFile, outFile.name, qcFolder, restrictionSequence, danglingSequence, minDistance, maxLibraryInsertSize, threads, removeSelfLigation).split() hicBuildMatrix.main(args) os.unlink(outFile.name) shutil.rmtree(qcFolder)
def test_build_matrix_restrictionCutFile_one( sam1, sam2, outFile, qcFolder, outBam, binSize, restrictionCutFile, minDistance, maxDistance, maxLibraryInsertSize, restrictionSequence, danglingSequence, region, removeSelfLigation, minMappingQuality, threads, inputBufferSize): # test restrictionCutFile args = "-s {} {} --restrictionCutFile {} --outFileName {} " \ "--QCfolder {} ".format(sam1, sam2, restrictionCutFile, outFile.name, qcFolder).split() hicBuildMatrix.main(args) os.unlink(outFile.name) shutil.rmtree(qcFolder)
def test_build_matrix(): outfile = NamedTemporaryFile(suffix='.h5', delete=False) outfile.close() qc_folder = mkdtemp(prefix="testQC_") args = "-s {} {} -o {} -bs 5000 -b /tmp/test.bam --QCfolder {}".format( sam_R1, sam_R2, outfile.name, qc_folder).split() hicBuildMatrix.main(args) test = hm.hiCMatrix(ROOT + "small_test_matrix.h5") new = hm.hiCMatrix(outfile.name) nt.assert_equal(test.matrix.data, new.matrix.data) assert test.cut_intervals == new.cut_intervals print set(os.listdir(ROOT + "QC/")) assert set(os.listdir(ROOT + "QC/")) == set(os.listdir(qc_folder)) os.unlink(outfile.name) shutil.rmtree(qc_folder)
def test_build_matrix_bin_size(sam1, sam2, outFile, qcFolder, outBam, binSize, restrictionCutFile, minDistance, maxDistance, maxLibraryInsertSize, restrictionSequence, danglingSequence, region, removeSelfLigation, minMappingQuality, threads, inputBufferSize): """ This test runs buildMatrix with all command line args for one time to ensure all args being ok. Note: Test will take some time. Note: parameters can be expanded (the values in the list) so that many combinations of command line args can be tested. """ # test binSize instead of restrictionCutFile args = "-s {} {} --binSize {} --outFileName {} --QCfolder {} ".format( sam1, sam2, binSize, outFile.name, qcFolder).split() hicBuildMatrix.main(args) os.unlink(outFile.name) shutil.rmtree(qcFolder)