def _run_bax_to_bam(input_file_name, output_file_name): base_name = ".".join(output_file_name.split(".")[:-2]) input_file_name_tmp = input_file_name # XXX bax2bam won't write an hdfsubreadset unless the input is XML too if input_file_name.endswith(".bax.h5"): input_file_name_tmp = tempfile.NamedTemporaryFile( suffix=".hdfsubreadset.xml").name ds_tmp = HdfSubreadSet(input_file_name) ds_tmp.write(input_file_name_tmp) args =[ "bax2bam", "--subread", "-o", base_name, "--output-xml", output_file_name, "--xml", input_file_name_tmp ] log.info(" ".join(args)) result = run_cmd(" ".join(args), stdout_fh=sys.stdout, stderr_fh=sys.stderr) if result.exit_code != 0: return result.exit_code with SubreadSet(output_file_name) as ds: ds.assertIndexed() return 0
def _get_bax2bam_inputs(): """Little hackery to get the setup class Inputs and to avoid calls to setupclass if skiptest is used Nat: we want to test that this behaves properly when multiple movies are supplied as input, so we make an HdfSubreadSet on the fly from various bax files in testdata """ if HAVE_DATA_AND_BAX2BAM: hdf_subread_xml = tempfile.NamedTemporaryFile( suffix=".hdfsubreadset.xml").name bax_files = ( SIV_DATA_DIR + "/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.bax.h5", pbcore.data.getBaxH5_v23()[0]) ds = HdfSubreadSet(*bax_files) ds.name = "lambda_rsii" assert len(set([f.movieName for f in ds.resourceReaders()])) == 2 ds.write(hdf_subread_xml) return [hdf_subread_xml] else: # Assume the test data isn't found and the test won't be run return ["/path/to/this-test-should-be-skipped.txt"]
def _run_bax_to_bam(input_file_name, output_file_name): base_name = ".".join(output_file_name.split(".")[:-2]) input_file_name_tmp = input_file_name # XXX bax2bam won't write an hdfsubreadset unless the input is XML too if input_file_name.endswith(".bax.h5"): input_file_name_tmp = tempfile.NamedTemporaryFile( suffix=".hdfsubreadset.xml").name ds_tmp = HdfSubreadSet(input_file_name) ds_tmp.write(input_file_name_tmp) args =[ "bax2bam", "--subread", "-o", base_name, "--output-xml", output_file_name, "--xml", input_file_name_tmp ] logging.info(" ".join(args)) result = run_cmd(" ".join(args), stdout_fh=sys.stdout, stderr_fh=sys.stderr) if result.exit_code != 0: return result.exit_code tmp = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name shutil.move(output_file_name, tmp) # FIXME it would be better to leave this to bax2bam with SubreadSet(tmp) as ds: if not ds.isIndexed: ds.induceIndices() ds.write(output_file_name) return 0
def to_chunked_hdfsubreadset_files(hdfsubreadset_path, max_total_nchunks, chunk_key, dir_name, base_name, ext): dset = HdfSubreadSet(hdfsubreadset_path, strict=True) dset_chunks = dset.split(chunks=max_total_nchunks, ignoreSubDatasets=True) d = {} for i, dset in enumerate(dset_chunks): chunk_id = '_'.join([base_name, str(i)]) chunk_name = '.'.join([chunk_id, ext]) chunk_path = os.path.join(dir_name, chunk_name) dset.write(chunk_path) d[chunk_key] = os.path.abspath(chunk_path) c = PipelineChunk(chunk_id, **d) yield c
def _get_bax2bam_inputs(): """Little hackery to get the setup class Inputs and to avoid calls to setupclass if skiptest is used Nat: we want to test that this behaves properly when multiple movies are supplied as input, so we make an HdfSubreadSet on the fly from various bax files in testdata """ if HAVE_DATA_AND_BAX2BAM: hdf_subread_xml = tempfile.NamedTemporaryFile(suffix=".hdfsubreadset.xml").name bax_files = (SIV_DATA_DIR + "/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.bax.h5", pbtestdata.get_file("rsii-bax-h5")) ds = HdfSubreadSet(*bax_files) ds.name = "lambda_rsii" assert len(set([f.movieName for f in ds.resourceReaders()])) == 2 ds.write(hdf_subread_xml) return [hdf_subread_xml] else: # Assume the test data isn't found and the test won't be run return ["/path/to/this-test-should-be-skipped.txt"]