def run(self): ''' Run pacbio_smrtanalysis RS_sequencing''' original_dir = os.getcwd() os.chdir(self.working_directory) no_bsub_option = "--no_bsub" if self.no_bsub else "" # pacbio_smrtanalysis --memory 6 --reference /path/to/reference.fa RS_Resequencing Outputdir *.bax.h5 command = " ".join([ self.pacbio_exec, "--memory 6", no_bsub_option, "--reference", self.input_file, "RS_Resequencing", self.output_directory, self.read_data + "/*.bax.h5" ]) if (os.path.getsize(self.input_file)): fastaqutils.syscall(command) # subprocess.call(command, shell=True) self._produce_summary(command) else: self._produce_summary("File empty: " + self.input_file) # Wait for bsub to finish. Look into using process.wait() here # while not os.path.exists(self._build_default_filename()): # time.sleep(30) # # # Move results file and produce summary # shutil.move(self._build_default_filename(), self._build_final_filename()) # We have abandoned this effort of trying to wait for the bsub quiver job to finish # When run in the pipeline, we can carry out this step as a separate task # When run on the command line the user will just have to live with the quiver output (consensus.fasta) # Clean up quiver directory # if not self.debug and os.path.exists(os.path.join(self.working_directory, self.output_directory)): # shutil.rmtree(os.path.join(self.working_directory, self.output_directory)) os.chdir(original_dir)
def test_system_call(self): '''Test that system call appears to work and die as it should''' test_file = os.path.join(data_dir, 'utils_test_system_call.txt') tmp_out = 'utils_test_syscall.tmp' utils.syscall('cat ' + test_file + ' > ' + tmp_out) self.assertTrue(filecmp.cmp(tmp_out, test_file)) os.unlink(tmp_out) with self.assertRaises(utils.Error): utils.syscall('thisisveryunlikelytoebarealcommandandshouldthrowerror') utils.syscall('echo "this is not the right string" > ' + tmp_out) self.assertFalse(filecmp.cmp(tmp_out, test_file)) os.unlink(tmp_out) s = utils.syscall_get_stdout('echo bingo') self.assertListEqual(["bingo"], s) with self.assertRaises(utils.Error): utils.syscall_get_stdout('thisisveryunlikelytoebarealcommandandshouldthrowerror')
def test_system_call(self): '''Test that system call appears to work and die as it should''' test_file = os.path.join(data_dir, 'utils_test_system_call.txt') tmp_out = 'utils_test_syscall.tmp' utils.syscall('cat ' + test_file + ' > ' + tmp_out) self.assertTrue(filecmp.cmp(tmp_out, test_file)) os.unlink(tmp_out) with self.assertRaises(utils.Error): utils.syscall( 'thisisveryunlikelytoebarealcommandandshouldthrowerror') utils.syscall('echo "this is not the right string" > ' + tmp_out) self.assertFalse(filecmp.cmp(tmp_out, test_file)) os.unlink(tmp_out) s = utils.syscall_get_stdout('echo bingo') self.assertListEqual(["bingo"], s) with self.assertRaises(utils.Error): utils.syscall_get_stdout( 'thisisveryunlikelytoebarealcommandandshouldthrowerror')
help='Name of reference fasta file', metavar='reference.fa') parser.add_argument('outprefix', help='Prefix of output files') options = parser.parse_args() ref_seqs = {} tasks.file_to_dict(options.ref_fa, ref_seqs) nucmer_out_prefix = options.outprefix + '.nucmer' nucmer_out_delta = nucmer_out_prefix + '.delta' nucmer_out_filter = nucmer_out_prefix + '.delta-filter' nucmer_out_coords = nucmer_out_filter + '.coords' # run nucmer of contigs vs ref utils.syscall(' '.join([ 'nucmer', options.nucmer_options, '-p', nucmer_out_prefix, options.ref_fa, options.contigs_fa ])) utils.syscall(' '.join([ 'delta-filter', '-i 98 -l 180 -q', nucmer_out_delta, '>', nucmer_out_filter ])) utils.syscall(' '.join( ['show-coords', '-dTlro', nucmer_out_filter, '>', nucmer_out_coords])) # load hits into hash. key=ref_name, value=another hash with key=qry_name, value=list of hit positions in that ref seq nucmer_hits = {} contigs_to_print = {} nucmer_reader = nucmer_file_reader(nucmer_out_coords) for hit in nucmer_reader: if hit.ref_name not in nucmer_hits:
if (not sam1.is_reverse) and (not sam2.is_reverse): nodes[sam2.qname].add(sam1.qname) elif sam1.is_reverse and sam2.is_reverse: nodes[sam1.qname].add(sam2.qname) else: nodes[sam1.qname].add(sam2.qname) nodes[sam2.qname].add(sam1.qname) # make pdf of the graph using graphviz cmd = 'echo "digraph G {' first = True for node, l in sorted(nodes.items()): if first: first = False else: cmd += ';' if len(l): cmd += ';'.join([node + '->' + x for x in l]) else: cmd += node cmd += '}" | dot -Tpdf > ' + options.outprefix + '.pdf' make_graph = options.outprefix + '.make_graph.sh' f = utils.open_file_write(make_graph) print(cmd, file=f) utils.close(f) utils.syscall('bash ' + make_graph)
parser.add_argument("--nucmer_options", help="Options when running nucmer [%(default)s]", default="") parser.add_argument("contigs_fa", help="Name of contigs fasta file", metavar="contigs.fa") parser.add_argument("ref_fa", help="Name of reference fasta file", metavar="reference.fa") parser.add_argument("outprefix", help="Prefix of output files") options = parser.parse_args() ref_seqs = {} tasks.file_to_dict(options.ref_fa, ref_seqs) nucmer_out_prefix = options.outprefix + ".nucmer" nucmer_out_delta = nucmer_out_prefix + ".delta" nucmer_out_filter = nucmer_out_prefix + ".delta-filter" nucmer_out_coords = nucmer_out_filter + ".coords" # run nucmer of contigs vs ref utils.syscall(" ".join(["nucmer", options.nucmer_options, "-p", nucmer_out_prefix, options.ref_fa, options.contigs_fa])) utils.syscall(" ".join(["delta-filter", "-i 98 -l 180 -q", nucmer_out_delta, ">", nucmer_out_filter])) utils.syscall(" ".join(["show-coords", "-dTlro", nucmer_out_filter, ">", nucmer_out_coords])) # load hits into hash. key=ref_name, value=another hash with key=qry_name, value=list of hit positions in that ref seq nucmer_hits = {} contigs_to_print = {} nucmer_reader = nucmer_file_reader(nucmer_out_coords) for hit in nucmer_reader: if hit.ref_name not in nucmer_hits: nucmer_hits[hit.ref_name] = {} if hit.qry_name not in nucmer_hits[hit.ref_name]: nucmer_hits[hit.ref_name][hit.qry_name] = []
if (not sam1.is_reverse) and (not sam2.is_reverse): nodes[sam2.qname].add(sam1.qname) elif sam1.is_reverse and sam2.is_reverse: nodes[sam1.qname].add(sam2.qname) else: nodes[sam1.qname].add(sam2.qname) nodes[sam2.qname].add(sam1.qname) # make pdf of the graph using graphviz cmd = 'echo "digraph G {' first = True for node, l in sorted(nodes.items()): if first: first = False else: cmd += ";" if len(l): cmd += ";".join([node + "->" + x for x in l]) else: cmd += node cmd += '}" | dot -Tpdf > ' + options.outprefix + ".pdf" make_graph = options.outprefix + ".make_graph.sh" f = utils.open_file_write(make_graph) print(cmd, file=f) utils.close(f) utils.syscall("bash " + make_graph)