def merge_subalignments(self): ''' Merge alignment subset extended alignments to get one extended alignment for current placement subset. ''' pp = self.placement_problem _LOG.info("Merging sub-alignments for placement problem : %s." %(pp.label)) ''' First assign fragments to the placement problem''' pp.fragments = pp.parent.fragments.get_soft_sub_alignment([]) for ap in pp.get_children(): pp.fragments.seq_names |= set(ap.fragments) ''' Then Build an extended alignment by merging all hmmalign results''' extendedAlignment = ExtendedAlignment(pp.fragments.seq_names) for ap in pp.children: assert isinstance(ap, SeppProblem) ''' Get all fragment chunk alignments for this alignment subset''' aligned_files = [fp.get_job_result_by_name('hmmalign') for fp in ap.children if fp.get_job_result_by_name('hmmalign') is not None] _LOG.info("Merging fragment chunks for subalignment : %s." %(ap.label)) ap_alg = ap.read_extendend_alignment_and_relabel_columns\ (ap.jobs["hmmbuild"].infile , aligned_files) _LOG.info("Merging alignment subset into placement subset: %s." %(ap.label)) extendedAlignment.merge_in(ap_alg,convert_to_string=False) del ap_alg extendedAlignment.from_bytearray_to_string() return extendedAlignment
def merge_subalignments(self): ''' Merge alignment subset extended alignments to get one extended alignment for current placement subset. ''' pp = self.placement_problem _LOG.info("Merging sub-alignments for placement problem : %s." % (pp.label)) ''' First assign fragments to the placement problem''' pp.fragments = pp.parent.fragments.get_soft_sub_alignment([]) for ap in pp.get_children(): pp.fragments.seq_names |= set(ap.fragments) ''' Then Build an extended alignment by merging all hmmalign results''' extendedAlignment = ExtendedAlignment(pp.fragments.seq_names) for ap in pp.children: assert isinstance(ap, SeppProblem) ''' Get all fragment chunk alignments for this alignment subset''' aligned_files = [ fp.get_job_result_by_name('hmmalign') for fp in ap.children if fp.get_job_result_by_name('hmmalign') is not None ] _LOG.info("Merging fragment chunks for subalignment : %s." % (ap.label)) ap_alg = ap.read_extendend_alignment_and_relabel_columns\ (ap.jobs["hmmbuild"].infile , aligned_files) _LOG.info("Merging alignment subset into placement subset: %s." % (ap.label)) extendedAlignment.merge_in(ap_alg, convert_to_string=False) del ap_alg extendedAlignment.from_bytearray_to_string() return extendedAlignment
def merge_subalignments(self): ''' Merge alignment subset extended alignments to get one extended alignment for current placement subset. ''' pp = self.placement_problem _LOG.info("Merging sub-alignments for placement problem : %s." % (pp.label)) ''' First find fragments assigned to this placement problem''' pp.fragments = pp.parent.fragments.get_soft_sub_alignment([]) for ap in pp.get_children(): pp.fragments.seq_names |= set(ap.fragments) ''' Then, gather a list of all alignments relevant to this placement subset''' fragfilesperap = dict() for ap in pp.children: assert isinstance(ap, SeppProblem) ''' Get all fragment chunk alignments for this alignment subset''' aligned_files = [fp.get_job_result_by_name('hmmalign') for fp in ap.children] fragfilesperap[ap] = aligned_files ''' Now, build an extended alignment *per each fragment chunk*. Simply merge all hmmalign results for fragment chunk numbered i''' extendedAlignments = [] for i in range(0, self.root_problem.fragment_chunks): extendedAlignment = ExtendedAlignment(pp.fragments.seq_names) for ap in pp.children: # _LOG.debug("Merging fragment chunks for subalignment : %s." # %(ap.label)) if fragfilesperap[ap][i]: ap_alg = ap.read_extendend_alignment_and_relabel_columns( ap.jobs["hmmbuild"].infile, [fragfilesperap[ap][i]]) else: ap_alg = ap.read_extendend_alignment_and_relabel_columns( ap.jobs["hmmbuild"].infile, []) _LOG.debug( ("Merging alignment subset into placement subset for " "chunk %d: %s.") % (i, ap.label)) extendedAlignment.merge_in(ap_alg, convert_to_string=False) '''Extended alignmnts have all fragments. remove the ones that don't belong to thsi chunk''' extendedAlignment.remove_missing_fragments() extendedAlignment.from_bytearray_to_string() extendedAlignments.append(extendedAlignment) return extendedAlignments
def merge_subalignments(self): ''' Merge alignment subset extended alignments to get one extended alignment for current placement subset. ''' pp = self.placement_problem _LOG.info("Merging sub-alignments for placement problem : %s." % (pp.label)) ''' First find fragments assigned to this placement problem''' pp.fragments = pp.parent.fragments.get_soft_sub_alignment([]) for ap in pp.get_children(): pp.fragments.seq_names |= set(ap.fragments) ''' Then, gather a list of all alignments relevant to this placement subset''' fragfilesperap = dict() for ap in pp.children: assert isinstance(ap, SeppProblem) ''' Get all fragment chunk alignments for this alignment subset''' aligned_files = [ fp.get_job_result_by_name('hmmalign') for fp in ap.children ] fragfilesperap[ap] = aligned_files ''' Now, build an extended alignment *per each fragment chunk*. Simply merge all hmmalign results for fragment chunk numbered i''' extendedAlignments = [] for i in range(0, self.root_problem.fragment_chunks): extendedAlignment = ExtendedAlignment(pp.fragments.seq_names) for ap in pp.children: # _LOG.debug("Merging fragment chunks for subalignment : %s." # %(ap.label)) if fragfilesperap[ap][i]: ap_alg = ap.read_extendend_alignment_and_relabel_columns( ap.jobs["hmmbuild"].infile, [fragfilesperap[ap][i]]) else: ap_alg = ap.read_extendend_alignment_and_relabel_columns( ap.jobs["hmmbuild"].infile, []) _LOG.debug( ("Merging alignment subset into placement subset for " "chunk %d: %s.") % (i, ap.label)) extendedAlignment.merge_in(ap_alg, convert_to_string=False) '''Extended alignmnts have all fragments. remove the ones that don't belong to thsi chunk''' extendedAlignment.remove_missing_fragments() extendedAlignment.from_bytearray_to_string() extendedAlignments.append(extendedAlignment) return extendedAlignments
def merge_results(self): assert \ len(self.root_problem.get_children()) == 1, \ "Currently UPP works with only one placement subset." ''' Merge alignment subset extended alignments to get one extended alignment for current placement subset. ''' pp = self.root_problem.get_children()[0] _LOG.info("Merging sub-alignments for placement problem : %s." % pp.label) ''' First assign fragments to the placement problem''' pp.fragments = pp.parent.fragments.get_soft_sub_alignment([]) for ap in pp.get_children(): pp.fragments.seq_names |= set(ap.fragments) ''' Then Build an extended alignment by merging all hmmalign results''' _LOG.debug("fragments are %d:\n %s" % (len(pp.fragments.seq_names), pp.fragments.seq_names)) extendedAlignment = ExtendedAlignment(pp.fragments.seq_names) for ap in pp.children: assert isinstance(ap, SeppProblem) ''' Get all fragment chunk alignments for this alignment subset''' aligned_files = [ fp.get_job_result_by_name('hmmalign') for fp in ap.children if fp.get_job_result_by_name('hmmalign') is not None ] _LOG.debug("Merging fragment chunks for subalignment : %s." % ap.label) ap_alg = ap.read_extendend_alignment_and_relabel_columns( ap.jobs["hmmbuild"].infile, aligned_files) _LOG.debug("Merging alignment subset into placement subset: %s." % ap.label) extendedAlignment.merge_in(ap_alg, convert_to_string=False) extendedAlignment.from_bytearray_to_string() self.results = extendedAlignment