def test_cat_files(self): '''test cat_files''' infiles = [ os.path.join(data_dir, 'test_common_cat_files.in.1'), os.path.join(data_dir, 'test_common_cat_files.in.2'), os.path.join(data_dir, 'test_common_cat_files.in.3'), ] tmp_out = 'tmp.test.common_cat_files.out' expected = os.path.join(data_dir, 'test_common_cat_files.out') common.cat_files(infiles, tmp_out) self.assertTrue(filecmp.cmp(expected, tmp_out, shallow=False)) os.unlink(tmp_out)
def _run(self): cwd = os.getcwd() try: os.chdir(self.outdir) self.write_versions_file(cwd) self._map_and_cluster_reads() self.log_files = None if len(self.cluster_to_dir) > 0: got_insert_data_ok = self._set_insert_size_data() if not got_insert_data_ok: print('WARNING: not enough proper read pairs (found ' + str(self.proper_pairs) + ') to determine insert size.', file=sys.stderr) print( 'This probably means that very few reads were mapped at all. No local assemblies will be run', file=sys.stderr) if self.verbose: print( 'Not enough proper read pairs mapped to determine insert size. Skipping all assemblies.', flush=True) else: if self.verbose: print('{:_^79}'.format(' Assembling each cluster ')) print('Will run', self.threads, 'cluster(s) in parallel', flush=True) self._init_and_run_clusters() if self.verbose: print('Finished assembling clusters\n') else: if self.verbose: print('No reads mapped. Skipping all assemblies', flush=True) print( 'WARNING: no reads mapped to reference genes. Therefore no local assemblies will be run', file=sys.stderr) if not self.clusters_all_ran_ok: raise Error('At least one cluster failed! Stopping...') if self.verbose: print('{:_^79}'.format(' Writing reports '), flush=True) print('Making', self.report_file_all_tsv) self._write_report(self.clusters, self.report_file_all_tsv) if self.verbose: print('Making', self.report_file_filtered) rf = report_filter.ReportFilter(infile=self.report_file_all_tsv) rf.run(self.report_file_filtered) if self.verbose: print() print( '{:_^79}'.format(' Writing fasta of assembled sequences '), flush=True) print(self.catted_assembled_seqs_fasta, 'and', self.catted_genes_matching_refs_fasta, flush=True) self._write_catted_assembled_seqs_fasta( self.catted_assembled_seqs_fasta) self._write_catted_genes_matching_refs_fasta( self.catted_genes_matching_refs_fasta) self._write_catted_assemblies_fasta(self.catted_assemblies_fasta) if self.log_files is not None: clusters_log_file = os.path.join(self.outdir, 'log.clusters.gz') if self.verbose: print() print('{:_^79}'.format(' Catting cluster log files '), flush=True) print('Writing file', clusters_log_file, flush=True) common.cat_files(self.log_files, clusters_log_file) if self.verbose: print() print('{:_^79}'.format(' Cleaning files '), flush=True) self._clean() Clusters._write_mlst_reports(self.mlst_profile_file, self.report_file_filtered, self.mlst_reports_prefix, verbose=self.verbose) if self.clusters_all_ran_ok and self.verbose: print('\nAll done!\n') finally: os.chdir(cwd)
def _run(self): cwd = os.getcwd() os.chdir(self.outdir) self.write_versions_file(cwd) self._map_and_cluster_reads() self.log_files = None if len(self.cluster_to_dir) > 0: got_insert_data_ok = self._set_insert_size_data() if not got_insert_data_ok: print('WARNING: not enough proper read pairs (found ' + str(self.proper_pairs) + ') to determine insert size.', file=sys.stderr) print('This probably means that very few reads were mapped at all. No local assemblies will be run', file=sys.stderr) if self.verbose: print('Not enough proper read pairs mapped to determine insert size. Skipping all assemblies.', flush=True) else: if self.verbose: print('{:_^79}'.format(' Assembling each cluster ')) print('Will run', self.threads, 'cluster(s) in parallel', flush=True) self._init_and_run_clusters() if self.verbose: print('Finished assembling clusters\n') else: if self.verbose: print('No reads mapped. Skipping all assemblies', flush=True) print('WARNING: no reads mapped to reference genes. Therefore no local assemblies will be run', file=sys.stderr) if not self.clusters_all_ran_ok: raise Error('At least one cluster failed! Stopping...') if self.verbose: print('{:_^79}'.format(' Writing reports '), flush=True) print('Making', self.report_file_all_tsv) self._write_report(self.clusters, self.report_file_all_tsv) if self.verbose: print('Making', self.report_file_filtered) rf = report_filter.ReportFilter(infile=self.report_file_all_tsv) rf.run(self.report_file_filtered) if self.verbose: print() print('{:_^79}'.format(' Writing fasta of assembled sequences '), flush=True) print(self.catted_assembled_seqs_fasta, 'and', self.catted_genes_matching_refs_fasta, flush=True) self._write_catted_assembled_seqs_fasta(self.catted_assembled_seqs_fasta) self._write_catted_genes_matching_refs_fasta(self.catted_genes_matching_refs_fasta) self._write_catted_assemblies_fasta(self.catted_assemblies_fasta) if self.log_files is not None: clusters_log_file = os.path.join(self.outdir, 'log.clusters.gz') if self.verbose: print() print('{:_^79}'.format(' Catting cluster log files '), flush=True) print('Writing file', clusters_log_file, flush=True) common.cat_files(self.log_files, clusters_log_file) if self.verbose: print() print('{:_^79}'.format(' Cleaning files '), flush=True) self._clean() Clusters._write_mlst_reports(self.mlst_profile_file, self.report_file_filtered, self.mlst_reports_prefix, verbose=self.verbose) if self.clusters_all_ran_ok and self.verbose: print('\nAll done!\n') os.chdir(cwd)