def merge_subalignments(self): ''' Merge alignment subset extended alignments to get one extended alignment for current placement subset. ''' pp = self.placement_problem _LOG.info("Merging sub-alignments for placement problem : %s." % (pp.label)) ''' First find fragments assigned to this placement problem''' pp.fragments = pp.parent.fragments.get_soft_sub_alignment([]) for ap in pp.get_children(): pp.fragments.seq_names |= set(ap.fragments) ''' Then, gather a list of all alignments relevant to this placement subset''' fragfilesperap = dict() for ap in pp.children: assert isinstance(ap, SeppProblem) ''' Get all fragment chunk alignments for this alignment subset''' aligned_files = [fp.get_job_result_by_name('hmmalign') for fp in ap.children] fragfilesperap[ap] = aligned_files ''' Now, build an extended alignment *per each fragment chunk*. Simply merge all hmmalign results for fragment chunk numbered i''' extendedAlignments = [] for i in range(0, self.root_problem.fragment_chunks): extendedAlignment = ExtendedAlignment(pp.fragments.seq_names) for ap in pp.children: # _LOG.debug("Merging fragment chunks for subalignment : %s." # %(ap.label)) if fragfilesperap[ap][i]: ap_alg = ap.read_extendend_alignment_and_relabel_columns( ap.jobs["hmmbuild"].infile, [fragfilesperap[ap][i]]) else: ap_alg = ap.read_extendend_alignment_and_relabel_columns( ap.jobs["hmmbuild"].infile, []) _LOG.debug( ("Merging alignment subset into placement subset for " "chunk %d: %s.") % (i, ap.label)) extendedAlignment.merge_in(ap_alg, convert_to_string=False) '''Extended alignmnts have all fragments. remove the ones that don't belong to thsi chunk''' extendedAlignment.remove_missing_fragments() extendedAlignment.from_bytearray_to_string() extendedAlignments.append(extendedAlignment) return extendedAlignments
def merge_subalignments(self): ''' Merge alignment subset extended alignments to get one extended alignment for current placement subset. ''' pp = self.placement_problem _LOG.info("Merging sub-alignments for placement problem : %s." % (pp.label)) ''' First find fragments assigned to this placement problem''' pp.fragments = pp.parent.fragments.get_soft_sub_alignment([]) for ap in pp.get_children(): pp.fragments.seq_names |= set(ap.fragments) ''' Then, gather a list of all alignments relevant to this placement subset''' fragfilesperap = dict() for ap in pp.children: assert isinstance(ap, SeppProblem) ''' Get all fragment chunk alignments for this alignment subset''' aligned_files = [ fp.get_job_result_by_name('hmmalign') for fp in ap.children ] fragfilesperap[ap] = aligned_files ''' Now, build an extended alignment *per each fragment chunk*. Simply merge all hmmalign results for fragment chunk numbered i''' extendedAlignments = [] for i in range(0, self.root_problem.fragment_chunks): extendedAlignment = ExtendedAlignment(pp.fragments.seq_names) for ap in pp.children: # _LOG.debug("Merging fragment chunks for subalignment : %s." # %(ap.label)) if fragfilesperap[ap][i]: ap_alg = ap.read_extendend_alignment_and_relabel_columns( ap.jobs["hmmbuild"].infile, [fragfilesperap[ap][i]]) else: ap_alg = ap.read_extendend_alignment_and_relabel_columns( ap.jobs["hmmbuild"].infile, []) _LOG.debug( ("Merging alignment subset into placement subset for " "chunk %d: %s.") % (i, ap.label)) extendedAlignment.merge_in(ap_alg, convert_to_string=False) '''Extended alignmnts have all fragments. remove the ones that don't belong to thsi chunk''' extendedAlignment.remove_missing_fragments() extendedAlignment.from_bytearray_to_string() extendedAlignments.append(extendedAlignment) return extendedAlignments