Esempio n. 1
0
 def test_cat_files(self):
     '''test cat_files'''
     infiles = [
         os.path.join(data_dir, 'test_common_cat_files.in.1'),
         os.path.join(data_dir, 'test_common_cat_files.in.2'),
         os.path.join(data_dir, 'test_common_cat_files.in.3'),
     ]
     tmp_out = 'tmp.test.common_cat_files.out'
     expected = os.path.join(data_dir, 'test_common_cat_files.out')
     common.cat_files(infiles, tmp_out)
     self.assertTrue(filecmp.cmp(expected, tmp_out, shallow=False))
     os.unlink(tmp_out)
Esempio n. 2
0
 def test_cat_files(self):
     '''test cat_files'''
     infiles = [
         os.path.join(data_dir, 'test_common_cat_files.in.1'),
         os.path.join(data_dir, 'test_common_cat_files.in.2'),
         os.path.join(data_dir, 'test_common_cat_files.in.3'),
     ]
     tmp_out = 'tmp.test.common_cat_files.out'
     expected = os.path.join(data_dir, 'test_common_cat_files.out')
     common.cat_files(infiles, tmp_out)
     self.assertTrue(filecmp.cmp(expected, tmp_out, shallow=False))
     os.unlink(tmp_out)
Esempio n. 3
0
    def _run(self):
        cwd = os.getcwd()
        try:
            os.chdir(self.outdir)
            self.write_versions_file(cwd)
            self._map_and_cluster_reads()
            self.log_files = None

            if len(self.cluster_to_dir) > 0:
                got_insert_data_ok = self._set_insert_size_data()
                if not got_insert_data_ok:
                    print('WARNING: not enough proper read pairs (found ' +
                          str(self.proper_pairs) +
                          ') to determine insert size.',
                          file=sys.stderr)
                    print(
                        'This probably means that very few reads were mapped at all. No local assemblies will be run',
                        file=sys.stderr)
                    if self.verbose:
                        print(
                            'Not enough proper read pairs mapped to determine insert size. Skipping all assemblies.',
                            flush=True)
                else:
                    if self.verbose:
                        print('{:_^79}'.format(' Assembling each cluster '))
                        print('Will run',
                              self.threads,
                              'cluster(s) in parallel',
                              flush=True)
                    self._init_and_run_clusters()
                    if self.verbose:
                        print('Finished assembling clusters\n')
            else:
                if self.verbose:
                    print('No reads mapped. Skipping all assemblies',
                          flush=True)
                print(
                    'WARNING: no reads mapped to reference genes. Therefore no local assemblies will be run',
                    file=sys.stderr)

            if not self.clusters_all_ran_ok:
                raise Error('At least one cluster failed! Stopping...')

            if self.verbose:
                print('{:_^79}'.format(' Writing reports '), flush=True)
                print('Making', self.report_file_all_tsv)
            self._write_report(self.clusters, self.report_file_all_tsv)

            if self.verbose:
                print('Making', self.report_file_filtered)
            rf = report_filter.ReportFilter(infile=self.report_file_all_tsv)
            rf.run(self.report_file_filtered)

            if self.verbose:
                print()
                print(
                    '{:_^79}'.format(' Writing fasta of assembled sequences '),
                    flush=True)
                print(self.catted_assembled_seqs_fasta,
                      'and',
                      self.catted_genes_matching_refs_fasta,
                      flush=True)
            self._write_catted_assembled_seqs_fasta(
                self.catted_assembled_seqs_fasta)
            self._write_catted_genes_matching_refs_fasta(
                self.catted_genes_matching_refs_fasta)
            self._write_catted_assemblies_fasta(self.catted_assemblies_fasta)

            if self.log_files is not None:
                clusters_log_file = os.path.join(self.outdir,
                                                 'log.clusters.gz')
                if self.verbose:
                    print()
                    print('{:_^79}'.format(' Catting cluster log files '),
                          flush=True)
                    print('Writing file', clusters_log_file, flush=True)
                common.cat_files(self.log_files, clusters_log_file)

            if self.verbose:
                print()
                print('{:_^79}'.format(' Cleaning files '), flush=True)
            self._clean()

            Clusters._write_mlst_reports(self.mlst_profile_file,
                                         self.report_file_filtered,
                                         self.mlst_reports_prefix,
                                         verbose=self.verbose)

            if self.clusters_all_ran_ok and self.verbose:
                print('\nAll done!\n')
        finally:
            os.chdir(cwd)
Esempio n. 4
0
    def _run(self):
        cwd = os.getcwd()
        os.chdir(self.outdir)
        self.write_versions_file(cwd)
        self._map_and_cluster_reads()
        self.log_files = None

        if len(self.cluster_to_dir) > 0:
            got_insert_data_ok = self._set_insert_size_data()
            if not got_insert_data_ok:
                print('WARNING: not enough proper read pairs (found ' + str(self.proper_pairs) + ') to determine insert size.', file=sys.stderr)
                print('This probably means that very few reads were mapped at all. No local assemblies will be run', file=sys.stderr)
                if self.verbose:
                    print('Not enough proper read pairs mapped to determine insert size. Skipping all assemblies.', flush=True)
            else:
                if self.verbose:
                    print('{:_^79}'.format(' Assembling each cluster '))
                    print('Will run', self.threads, 'cluster(s) in parallel', flush=True)
                self._init_and_run_clusters()
                if self.verbose:
                    print('Finished assembling clusters\n')
        else:
            if self.verbose:
                print('No reads mapped. Skipping all assemblies', flush=True)
            print('WARNING: no reads mapped to reference genes. Therefore no local assemblies will be run', file=sys.stderr)

        if not self.clusters_all_ran_ok:
            raise Error('At least one cluster failed! Stopping...')

        if self.verbose:
            print('{:_^79}'.format(' Writing reports '), flush=True)
            print('Making', self.report_file_all_tsv)
        self._write_report(self.clusters, self.report_file_all_tsv)

        if self.verbose:
            print('Making', self.report_file_filtered)
        rf = report_filter.ReportFilter(infile=self.report_file_all_tsv)
        rf.run(self.report_file_filtered)

        if self.verbose:
            print()
            print('{:_^79}'.format(' Writing fasta of assembled sequences '), flush=True)
            print(self.catted_assembled_seqs_fasta, 'and', self.catted_genes_matching_refs_fasta, flush=True)
        self._write_catted_assembled_seqs_fasta(self.catted_assembled_seqs_fasta)
        self._write_catted_genes_matching_refs_fasta(self.catted_genes_matching_refs_fasta)
        self._write_catted_assemblies_fasta(self.catted_assemblies_fasta)

        if self.log_files is not None:
            clusters_log_file = os.path.join(self.outdir, 'log.clusters.gz')
            if self.verbose:
                print()
                print('{:_^79}'.format(' Catting cluster log files '), flush=True)
                print('Writing file', clusters_log_file, flush=True)
            common.cat_files(self.log_files, clusters_log_file)

        if self.verbose:
            print()
            print('{:_^79}'.format(' Cleaning files '), flush=True)
        self._clean()

        Clusters._write_mlst_reports(self.mlst_profile_file, self.report_file_filtered, self.mlst_reports_prefix, verbose=self.verbose)

        if self.clusters_all_ran_ok and self.verbose:
            print('\nAll done!\n')

        os.chdir(cwd)