Example #1
0
    def merge_split_summaries(self):
        merged_summary_index = {}
        merged_summary_index_path = os.path.join(self.output_directory, 'SUMMARY.cp')
        summary_dir = filesnpaths.gen_output_directory(os.path.join(self.output_directory, 'SUMMARY'), delete_if_exists = True)


        # read all index files per run into a dict here, so the access is easier from within
        # the for loop below
        run_sum_indices = {}
        for runinfo  in self.input_runinfo_dicts.values():
            run_sum_indices[runinfo['sample_id']] = dictio.read_serialized_object(runinfo['profile_summary_index'])

        for i in range(0, len(self.split_names)):
            self.progress.update('merging summaries for splits %s of %s' % (i + 1, len(self.split_names)))
            split_name = self.split_names[i]

            merged_summary = {}
            for runinfo in self.input_runinfo_dicts.values(): 
                run_split_summary = dictio.read_serialized_object(os.path.join(runinfo['input_dir'], run_sum_indices[runinfo['sample_id']][split_name]))
                merged_summary[runinfo['sample_id']] = run_split_summary[runinfo['sample_id']]

            merged_split_summary_path = os.path.join(summary_dir, os.path.basename(run_sum_indices[runinfo['sample_id']][split_name]))
            dictio.write_serialized_object(merged_summary, merged_split_summary_path)
            merged_summary_index[split_name] = merged_split_summary_path

        self.progress.update('Serializing merged split summary index ...')
        dictio.write_serialized_object(dictio.strip_prefix_from_dict_values(merged_summary_index, self.output_directory),\
                                           merged_summary_index_path)

        return summary_dir, merged_summary_index_path
Example #2
0
    def store_summarized_profile_for_each_split(self):
        summary_index = {}
        summary_index_output_path = self.generate_output_destination("SUMMARY.cp")
        summary_dir = self.generate_output_destination("SUMMARY", directory=True)
        self.progress.new("Storing summary files")

        counter = 1

        for contig in self.contigs:
            self.progress.update("working on contig %s of %s" % (pp(counter), pp(len(self.contigs))))
            for split in self.contigs[contig].splits:
                split_summary_path = self.generate_output_destination(os.path.join(summary_dir, "%.6d.cp" % counter))
                dictio.write_serialized_object(
                    {
                        self.sample_id: {
                            "coverage": split.coverage.c,
                            "variability": split.auxiliary.v,
                            "competing_nucleotides": split.auxiliary.competing_nucleotides,
                        }
                    },
                    split_summary_path,
                )
                summary_index[split.name] = split_summary_path
                counter += 1

        self.progress.end()
        self.run.info("profile_summary_dir", summary_dir)
        dictio.write_serialized_object(
            dictio.strip_prefix_from_dict_values(summary_index, self.output_directory), summary_index_output_path
        )
        self.run.info("profile_summary_index", summary_index_output_path)
Example #3
0
    def store_info_dict(self, destination, strip_prefix=None):
        if strip_prefix:
            # mostly to get rid of output_dir prefix in output file names.
            # surprisingly enough, this is the best place to do it. live
            # and learn :/
            self.info_dict = dictio.strip_prefix_from_dict_values(self.info_dict, strip_prefix)

        dictio.write_serialized_object(self.info_dict, destination)
Example #4
0
    def store_info_dict(self, destination, strip_prefix = None):
        if strip_prefix:
            # mostly to get rid of output_dir prefix in output file names.
            # surprisingly enough, this is the best place to do it. live 
            # and learn :/
            self.info_dict = dictio.strip_prefix_from_dict_values(self.info_dict, strip_prefix)

        dictio.write_serialized_object(self.info_dict, destination)