예제 #1
0
 def merge_subalignments(self):
     '''
     Merge alignment subset extended alignments to get one extended alignment
     for current placement subset.
     '''
     pp = self.placement_problem
     _LOG.info("Merging sub-alignments for placement problem : %s." %(pp.label))
     ''' First assign fragments to the placement problem'''
     pp.fragments = pp.parent.fragments.get_soft_sub_alignment([])
     for ap in pp.get_children():
         pp.fragments.seq_names |= set(ap.fragments)
     ''' Then Build an extended alignment by merging all hmmalign results'''
     extendedAlignment = ExtendedAlignment(pp.fragments.seq_names)
     for ap in pp.children:
         assert isinstance(ap, SeppProblem)
         ''' Get all fragment chunk alignments for this alignment subset'''
         aligned_files = [fp.get_job_result_by_name('hmmalign') for
                             fp in ap.children if
                             fp.get_job_result_by_name('hmmalign') is not None]
         _LOG.info("Merging fragment chunks for subalignment : %s." %(ap.label))
         ap_alg = ap.read_extendend_alignment_and_relabel_columns\
                     (ap.jobs["hmmbuild"].infile , aligned_files)
         _LOG.info("Merging alignment subset into placement subset: %s." %(ap.label))
         
         extendedAlignment.merge_in(ap_alg,convert_to_string=False)
         del ap_alg
     extendedAlignment.from_bytearray_to_string()
     return extendedAlignment
예제 #2
0
파일: exhaustive.py 프로젝트: khanf2/sepp
    def merge_subalignments(self):
        '''
        Merge alignment subset extended alignments to get one extended alignment
        for current placement subset.
        '''
        pp = self.placement_problem
        _LOG.info("Merging sub-alignments for placement problem : %s." %
                  (pp.label))
        ''' First assign fragments to the placement problem'''
        pp.fragments = pp.parent.fragments.get_soft_sub_alignment([])
        frags = []
        for ap in pp.get_children():
            frags.extend(ap.fragments)
        pp.fragments.seq_names.update(frags)
        ''' Then Build an extended alignment by merging all hmmalign results'''
        extendedAlignment = ExtendedAlignment(pp.fragments.seq_names)
        for ap in pp.children:
            assert isinstance(ap, SeppProblem)
            ''' Get all fragment chunk alignments for this alignment subset'''
            aligned_files = [
                fp.get_job_result_by_name('hmmalign') for fp in ap.children
                if fp.get_job_result_by_name('hmmalign') is not None
            ]
            _LOG.info("Merging fragment chunks for subalignment : %s." %
                      (ap.label))
            ap_alg = ap.read_extendend_alignment_and_relabel_columns\
                        (ap.jobs["hmmbuild"].infile , aligned_files)
            _LOG.info("Merging alignment subset into placement subset: %s." %
                      (ap.label))
            extendedAlignment.merge_in(ap_alg, convert_to_string=False)
            del ap_alg

        extendedAlignment.from_bytearray_to_string()
        return extendedAlignment
예제 #3
0
파일: exhaustive.py 프로젝트: smirarab/sepp
    def merge_subalignments(self):
        '''
        Merge alignment subset extended alignments to get one extended
        alignment for current placement subset.
        '''
        pp = self.placement_problem
        _LOG.info("Merging sub-alignments for placement problem : %s." %
                  (pp.label))
        ''' First find fragments assigned to this placement problem'''
        pp.fragments = pp.parent.fragments.get_soft_sub_alignment([])
        for ap in pp.get_children():
            pp.fragments.seq_names |= set(ap.fragments)

        ''' Then, gather a list of all alignments relevant to this placement
        subset'''
        fragfilesperap = dict()
        for ap in pp.children:
            assert isinstance(ap, SeppProblem)
            ''' Get all fragment chunk alignments for this alignment subset'''
            aligned_files = [fp.get_job_result_by_name('hmmalign') for
                             fp in ap.children]
            fragfilesperap[ap] = aligned_files

        ''' Now, build an extended alignment *per each fragment chunk*.
            Simply merge all hmmalign results for fragment chunk numbered i'''
        extendedAlignments = []
        for i in range(0, self.root_problem.fragment_chunks):
            extendedAlignment = ExtendedAlignment(pp.fragments.seq_names)
            for ap in pp.children:
                # _LOG.debug("Merging fragment chunks for subalignment : %s."
                # %(ap.label))
                if fragfilesperap[ap][i]:
                    ap_alg = ap.read_extendend_alignment_and_relabel_columns(
                        ap.jobs["hmmbuild"].infile, [fragfilesperap[ap][i]])
                else:
                    ap_alg = ap.read_extendend_alignment_and_relabel_columns(
                        ap.jobs["hmmbuild"].infile, [])
                _LOG.debug(
                    ("Merging alignment subset into placement subset for "
                     "chunk %d: %s.") % (i, ap.label))
                extendedAlignment.merge_in(ap_alg, convert_to_string=False)
            '''Extended alignmnts have all fragments. remove the ones that
               don't belong to thsi chunk'''
            extendedAlignment.remove_missing_fragments()
            extendedAlignment.from_bytearray_to_string()
            extendedAlignments.append(extendedAlignment)
        return extendedAlignments
예제 #4
0
 def merge_subalignments(self):
     '''
     Merge alignment subset extended alignments to get one extended
     alignment for current placement subset.
     '''
     pp = self.placement_problem
     _LOG.info("Merging sub-alignments for placement problem : %s." %
               (pp.label))
     ''' First find fragments assigned to this placement problem'''
     pp.fragments = pp.parent.fragments.get_soft_sub_alignment([])
     for ap in pp.get_children():
         pp.fragments.seq_names |= set(ap.fragments)
     ''' Then, gather a list of all alignments relevant to this placement
     subset'''
     fragfilesperap = dict()
     for ap in pp.children:
         assert isinstance(ap, SeppProblem)
         ''' Get all fragment chunk alignments for this alignment subset'''
         aligned_files = [
             fp.get_job_result_by_name('hmmalign') for fp in ap.children
         ]
         fragfilesperap[ap] = aligned_files
     ''' Now, build an extended alignment *per each fragment chunk*.
         Simply merge all hmmalign results for fragment chunk numbered i'''
     extendedAlignments = []
     for i in range(0, self.root_problem.fragment_chunks):
         extendedAlignment = ExtendedAlignment(pp.fragments.seq_names)
         for ap in pp.children:
             # _LOG.debug("Merging fragment chunks for subalignment : %s."
             # %(ap.label))
             if fragfilesperap[ap][i]:
                 ap_alg = ap.read_extendend_alignment_and_relabel_columns(
                     ap.jobs["hmmbuild"].infile, [fragfilesperap[ap][i]])
             else:
                 ap_alg = ap.read_extendend_alignment_and_relabel_columns(
                     ap.jobs["hmmbuild"].infile, [])
             _LOG.debug(
                 ("Merging alignment subset into placement subset for "
                  "chunk %d: %s.") % (i, ap.label))
             extendedAlignment.merge_in(ap_alg, convert_to_string=False)
         '''Extended alignmnts have all fragments. remove the ones that
            don't belong to thsi chunk'''
         extendedAlignment.remove_missing_fragments()
         extendedAlignment.from_bytearray_to_string()
         extendedAlignments.append(extendedAlignment)
     return extendedAlignments
예제 #5
0
    def merge_results(self):
        assert isinstance(self.root_problem, SeppProblem)

        '''Generate single extended alignment'''
        fullExtendedAlignment = ExtendedAlignment(
            self.root_problem.fragments.keys())
        # self.root_problem.get_children()[0].jobs[get_placement_job_name(0)]\
        # .get_attribute("full_extended_alignment_object")
        for pp in self.root_problem.get_children():
            for i in range(0, self.root_problem.fragment_chunks):
                align_input = open(
                    pp.jobs[get_placement_job_name(i)]
                    .full_extended_alignment_file, 'rb')
                extended_alignment = pickle.load(align_input)
                align_input.close()
                fullExtendedAlignment.merge_in(
                    extended_alignment, convert_to_string=True)
        self.results = fullExtendedAlignment

        mergeinput = []
        '''Append main tree to merge input'''
        mergeinput.append("%s;" % (
            self.root_problem.subtree.compose_newick(labels=True)))
        for pp in self.root_problem.get_children():
            assert isinstance(pp, SeppProblem)
            for i in range(0, self.root_problem.fragment_chunks):
                if (pp.get_job_result_by_name(
                       get_placement_job_name(i)) is None):
                    continue
                '''Append subset trees and json locations to merge input'''
                mergeinput.append(
                    "%s;\n%s" % (
                        pp.subtree.compose_newick(labels=True),
                        pp.get_job_result_by_name(get_placement_job_name(i))))
        mergeinput.append("")
        mergeinput.append("")
        meregeinputstring = "\n".join(mergeinput)
        mergeJsonJob = self.get_merge_job(meregeinputstring)
        mergeJsonJob.run()
예제 #6
0
    def merge_results(self):
        assert isinstance(self.root_problem, RootProblem)

        '''Generate single extended alignment'''
        fullExtendedAlignment = ExtendedAlignment(
            self.root_problem.fragments.keys())
        # self.root_problem.get_children()[0].jobs[get_placement_job_name(0)]\
        # .get_attribute("full_extended_alignment_object")
        for pp in self.root_problem.get_children():
            for i in range(0, self.root_problem.fragment_chunks):
                align_input = open(
                    pp.jobs[get_placement_job_name(i)]
                    .full_extended_alignment_file, 'rb')
                extended_alignment = pickle.load(align_input)
                align_input.close()
                fullExtendedAlignment.merge_in(
                    extended_alignment, convert_to_string=True)
        self.results = fullExtendedAlignment

        mergeinput = []
        '''Append main tree to merge input'''
        mergeinput.append("%s;" % (
            self.root_problem.subtree.compose_newick(labels=True)))
        for pp in self.root_problem.get_children():
            assert isinstance(pp, SeppProblem)
            for i in range(0, self.root_problem.fragment_chunks):
                if (pp.get_job_result_by_name(
                       get_placement_job_name(i)) is None):
                    continue
                '''Append subset trees and json locations to merge input'''
                mergeinput.append(
                    "%s;\n%s" % (
                        pp.subtree.compose_newick(labels=True),
                        pp.get_job_result_by_name(get_placement_job_name(i))))
        mergeinput.append("")
        mergeinput.append("")
        meregeinputstring = "\n".join(mergeinput)
        merge_json_job = self.get_merge_job(meregeinputstring)
        merge_json_job.run()
예제 #7
0
    def merge_results(self):
        assert \
            len(self.root_problem.get_children()) == 1, \
            "Currently UPP works with only one placement subset."
        '''
        Merge alignment subset extended alignments to get one extended
        alignment for current placement subset.
        '''
        pp = self.root_problem.get_children()[0]
        _LOG.info(
            "Merging sub-alignments for placement problem : %s." % (pp.label))
        ''' First assign fragments to the placement problem'''
        pp.fragments = pp.parent.fragments.get_soft_sub_alignment([])
        for ap in pp.get_children():
            pp.fragments.seq_names |= set(ap.fragments)

        ''' Then Build an extended alignment by merging all hmmalign results'''
        _LOG.debug(
            "fragments are %d:\n %s" % (
                len(pp.fragments.seq_names), pp.fragments.seq_names))
        extendedAlignment = ExtendedAlignment(pp.fragments.seq_names)
        for ap in pp.children:
            assert isinstance(ap, SeppProblem)
            ''' Get all fragment chunk alignments for this alignment subset'''
            aligned_files = [fp.get_job_result_by_name('hmmalign') for
                             fp in ap.children if
                             fp.get_job_result_by_name('hmmalign') is not None]
            _LOG.debug(
                "Merging fragment chunks for subalignment : %s." % (ap.label))
            ap_alg = ap.read_extendend_alignment_and_relabel_columns(
                ap.jobs["hmmbuild"].infile, aligned_files)
            _LOG.debug(
                "Merging alignment subset into placement subset: %s." %
                (ap.label))
            extendedAlignment.merge_in(ap_alg, convert_to_string=False)

        extendedAlignment.from_bytearray_to_string()
        self.results = extendedAlignment
예제 #8
0
    def testExtendedAlignment(self):
        print "======= starting testExtendedAlignment ========="

        subset = [
            "SFIF", "SFII", "SCFC", "SGHD", "SDCC", "SBGE", "SFBB", "SDI",
            "SCGB", "SJGF", "SGBI", "SCJA", "SGAD", "SHEB", "SFHB", "SDJI",
            "SHED", "SJJJ", "SBBE", "SCCH", "SDJB", "SDAC", "SHEH", "SFDC",
            "SFEI", "SHHB", "SC", "SIAB", "SDDI", "SBCB", "SJB", "SEBD",
            "SFGD", "SHA", "SIDA", "SGHI", "SGIB", "SBFJ", "SFIE", "SCJF",
            "SJHJ", "SJBG", "SEJI", "SFFF", "SJ", "SIII", "SJHH", "SEIH",
            "SBDC", "SHDJ", "SJDD", "SGDB", "SIHA", "SIBB", "SECC", "SCAD",
            "SGBB", "SGIF", "SJHC", "SFCD", "SEAA", "SEFF", "SDFG", "SDJE",
            "SCFG", "SFH", "SCJ", "SDDD", "SEGD", "SCIH", "SDAG", "SCJE",
            "SFAJ", "SIDJ", "SE", "SHBC", "SJFF", "SCHD", "SBHA", "SEDF",
            "SFAF", "SEDD", "SDHD", "SGJD", "SIBH", "SGDF", "SIFA", "SJGA",
            "SIJB", "SFI", "SGA", "SBFC", "SBJA", "SFFC", "SFDH", "SFEE",
            "SBDF", "SGBJ", "SDHE", "SJIB", "SHHI", "SIDE", "SJII"
        ]

        alg = MutableAlignment()
        alg.read_filepath("data/simulated/test.fasta")
        alg.delete_all_gap()
        tlen = alg.get_length()

        frg = MutableAlignment()
        frg.read_filepath("data/simulated/test.fas")
        #print frg.get_num_taxa()

        pp = SeppProblem(alg.keys())
        pp.fragments = frg
        pp.subalignment = alg

        cp1 = SeppProblem(subset, pp)
        cp2 = SeppProblem(list(set(alg.keys()) - set(subset)), pp)
        cp1.fragments = ReadonlySubalignment(
            [k for k in frg.keys() if int(k[-1]) >= 9], frg)
        cp2.fragments = ReadonlySubalignment(
            [k for k in frg.keys() if int(k[-1]) <= 1], frg)

        cp1labels = cp1.write_subalignment_without_allgap_columns(
            "data/tmp/cp1.fasta")
        cp2labels = cp2.write_subalignment_without_allgap_columns(
            "data/tmp/cp2.fasta")
        tmp = MutableAlignment().read_filepath("data/tmp/cp1.fasta")
        assert all(
            [not tmp.is_all_gap(pos) for pos in xrange(0, tmp.get_length())])
        tmp = MutableAlignment().read_filepath("data/tmp/cp2.fasta")
        assert all(
            [not tmp.is_all_gap(pos) for pos in xrange(0, tmp.get_length())])

        cp1.fragments.write_to_path("data/tmp/cp1.frags.fas")
        cp2.fragments.write_to_path("data/tmp/cp2.frags.fas")
        '''We have done the hmmalign before. don't worry about that right now'''

        ext1 = ExtendedAlignment(cp1.fragments)
        ext1.build_extended_alignment("data/tmp/cp1.fasta",
                                      "data/tmp/cp1.extended.sto")
        ext1.relabel_original_columns(cp1labels)
        ext2 = ExtendedAlignment(cp2.fragments)
        ext2.build_extended_alignment("data/tmp/cp2.fasta",
                                      "data/tmp/cp2.extended.sto")
        ext2.relabel_original_columns(cp2labels)

        extmerger = ExtendedAlignment([])
        extmerger.merge_in(ext1)
        mixed = extmerger.merge_in(ext2)

        extmerger.write_to_path("data/tmp/extended.merged.fasta")

        assert extmerger.is_aligned(), "Merged alignment is not aligned"
        in1 = len([x for x in ext1._col_labels if x < 0])
        in2 = len([x for x in ext2._col_labels if x < 0])
        print "Merged:%d. Insertion1:%d Insertion2:%d BaseLen:%d" % (
            extmerger.get_length(), in1, in2, tlen)
        assert (in1 + in2 + tlen - mixed) == extmerger.get_length(
        ), "Lengths don't match up after merging. Merged:%d. Insertion1:%d Insertion2:%d BaseLen:%d Mixed-insertion: %d" % (
            extmerger.get_length(), in1, in2, tlen, mixed)
        assert (in1 + in2 - mixed) == len(
            list(extmerger.iter_insertion_columns())
        ), "Columns are not correctly labeled after merging. Merged insertion count:%d. Insertion1:%d Insertion2:%d Mixed-insertion: %d" % (
            len(list(extmerger.iter_insertion_columns())), in1, in1, mixed)

        tmp = extmerger.get_base_readonly_alignment().get_mutable_alignment()
        tmp.delete_all_gap()
        assert tmp.is_aligned(), "merged alignment should be aligned!"
        assert tmp.get_length() == tlen, "merged alignment has wrong length"
        assert all([alg[k] == s for (k, s) in tmp.items()
                    ]), "merged alignment should match original alignment"

        print "======= finished testExtendedAlignment ========="
예제 #9
0
for dir in dirs:
  print "Working on %s\n" % dir
  aligned_files = glob.glob('%sFC_*/hmmalign.results.*' % dir)
  sequence_files = glob.glob('%sFC_*/hmmalign.frag.*' % dir)
  base_alignment_file = glob.glob('%s/*.fasta' % dir)
  base_alignment = MutableAlignment()
  done = base_alignment.read_filepath(base_alignment_file[0])
  subbackbone = original_backbone.get_soft_sub_alignment(base_alignment.get_sequence_names())
  frags = MutableAlignment()
  sequence_names = []
  for file in sequence_files:
    seq = MutableAlignment()
    done = seq.read_filepath(file)
    done = sequence_names.extend(seq.get_sequence_names())
    for name, seq in seq.iteritems():
      frags[name] = seq.upper()
  problem = SeppProblem(sequence_names)  
  problem.set_subalignment(subbackbone)

  mut_subalg = problem.subalignment.get_mutable_alignment()
  remaining_cols = mut_subalg.delete_all_gap()        
  problem.annotations["ref.alignment.columns"] = remaining_cols
  problem.fragments = frags
  ap_alg = problem.read_extendend_alignment_and_relabel_columns\
                          (base_alignment_file, aligned_files)
  extendedAlignment.merge_in(ap_alg,convert_to_string=False)                        
                        
extendedAlignment.write_to_path("/projects/sate8/namphuon/ultra_large/1000000/upp_100_10_new/upp.unmasked.fasta")
extendedAlignment.remove_insertion_columns()
extendedAlignment.write_to_path("/projects/sate8/namphuon/ultra_large/1000000/upp_100_10_new/upp.masked.fasta")
예제 #10
0
    def merge_results(self):
        assert isinstance(self.root_problem, SeppProblem)
        '''Generate single extended alignment'''
        fullExtendedAlignment = ExtendedAlignment(
            self.root_problem.fragments.keys())
        # self.root_problem.get_children()[0].jobs[get_placement_job_name(0)]\
        # .get_attribute("full_extended_alignment_object")
        for pp in self.root_problem.get_children():
            for i in range(0, self.root_problem.fragment_chunks):
                extended_alignment = pp.jobs[get_placement_job_name(
                    i)].get_attribute("full_extended_alignment_object")
                fullExtendedAlignment.merge_in(extended_alignment,
                                               convert_to_string=True)
        self.results = fullExtendedAlignment

        # IF only one placement subset, no need to go to java
        if len(self.root_problem.get_children()) == 1:
            import json
            mergeinput = []
            for pp in self.root_problem.get_children():
                assert isinstance(pp, SeppProblem)
                for i in range(0, self.root_problem.fragment_chunks):
                    if (pp.get_job_result_by_name(get_placement_job_name(i)) is
                            None):
                        continue
                    '''Append subset trees and json locations to merge input'''
                    with open(
                            pp.get_job_result_by_name(
                                get_placement_job_name(i))) as f:
                        mergeinput.append(json.load(f))
                _LOG.info(
                    "There are %d fragment chunks on a single placement subset"
                    % len(mergeinput))
            result = mergeinput[0]
            for i in range(1, len(mergeinput)):
                result["placements"] = result["placements"] + mergeinput[i][
                    "placements"]
            with open(self.get_output_filename("placement.json"), 'w') as f:
                json.dump(result, f, sort_keys=True, indent=4)
        else:
            mergeinput = []
            '''Append main tree to merge input'''
            mergeinput.append(
                "%s;" %
                (self.root_problem.subtree.compose_newick(labels=True)))
            for pp in self.root_problem.get_children():
                assert isinstance(pp, SeppProblem)
                for i in range(0, self.root_problem.fragment_chunks):
                    if (pp.get_job_result_by_name(get_placement_job_name(i)) is
                            None):
                        continue
                    '''Append subset trees and json locations to merge input'''
                    mergeinput.append(
                        "%s;\n%s" %
                        (pp.subtree.compose_newick(labels=True),
                         pp.get_job_result_by_name(get_placement_job_name(i))))
            mergeinput.append("")
            mergeinput.append("")
            meregeinputstring = "\n".join(mergeinput)
            _LOG.debug(mergeinput)
            mergeJsonJob = MergeJsonJob()
            mergeJsonJob.setup(meregeinputstring,
                               self.get_output_filename("placement.json"))
            mergeJsonJob.run()
예제 #11
0
    def testExtendedAlignment(self):
        print "======= starting testExtendedAlignment ========="

        subset = ["SFIF","SFII","SCFC","SGHD","SDCC","SBGE","SFBB","SDI","SCGB","SJGF","SGBI","SCJA","SGAD","SHEB","SFHB","SDJI","SHED","SJJJ","SBBE","SCCH","SDJB","SDAC","SHEH","SFDC","SFEI","SHHB","SC","SIAB","SDDI","SBCB","SJB","SEBD","SFGD","SHA","SIDA","SGHI","SGIB","SBFJ","SFIE","SCJF","SJHJ","SJBG","SEJI","SFFF","SJ","SIII","SJHH","SEIH","SBDC","SHDJ","SJDD","SGDB","SIHA","SIBB","SECC","SCAD","SGBB","SGIF","SJHC","SFCD","SEAA","SEFF","SDFG","SDJE","SCFG","SFH","SCJ","SDDD","SEGD","SCIH","SDAG","SCJE","SFAJ","SIDJ","SE","SHBC","SJFF","SCHD","SBHA","SEDF","SFAF","SEDD","SDHD","SGJD","SIBH","SGDF","SIFA","SJGA","SIJB","SFI","SGA","SBFC","SBJA","SFFC","SFDH","SFEE","SBDF","SGBJ","SDHE","SJIB","SHHI","SIDE","SJII"]
         
        alg = MutableAlignment()
        alg.read_filepath("data/simulated/test.fasta")
        alg.delete_all_gap()
        tlen = alg.get_length()                    
        
        frg = MutableAlignment()
        frg.read_filepath("data/simulated/test.fas")
        #print frg.get_num_taxa()
        
        pp = SeppProblem(alg.keys())
        pp.fragments = frg
        pp.subalignment = alg
        
        cp1 = SeppProblem(subset, pp)
        cp2 = SeppProblem(list(set(alg.keys()) -set(subset)), pp)
        cp1.fragments = ReadonlySubalignment([k for k in frg.keys() if int(k[-1]) >= 9], frg)
        cp2.fragments = ReadonlySubalignment([k for k in frg.keys() if int(k[-1]) <= 1], frg)
        
        cp1labels = cp1.write_subalignment_without_allgap_columns("data/tmp/cp1.fasta")
        cp2labels = cp2.write_subalignment_without_allgap_columns("data/tmp/cp2.fasta")
        tmp = MutableAlignment().read_filepath("data/tmp/cp1.fasta")
        assert all([not tmp.is_all_gap(pos) for pos in xrange(0,tmp.get_length())])        
        tmp = MutableAlignment().read_filepath("data/tmp/cp2.fasta")
        assert all([not tmp.is_all_gap(pos) for pos in xrange(0,tmp.get_length())])
        
        cp1.fragments.write_to_path("data/tmp/cp1.frags.fas")
        cp2.fragments.write_to_path("data/tmp/cp2.frags.fas")
        
        '''We have done the hmmalign before. don't worry about that right now'''
        
        ext1 = ExtendedAlignment(cp1.fragments)
        ext1.build_extended_alignment("data/tmp/cp1.fasta", "data/tmp/cp1.extended.sto")
        ext1.relabel_original_columns(cp1labels)
        ext2 = ExtendedAlignment(cp2.fragments)
        ext2.build_extended_alignment("data/tmp/cp2.fasta", "data/tmp/cp2.extended.sto")
        ext2.relabel_original_columns(cp2labels)
        
        extmerger = ExtendedAlignment([])
        extmerger.merge_in(ext1)
        mixed = extmerger.merge_in(ext2)
                        
        extmerger.write_to_path("data/tmp/extended.merged.fasta")        

        assert extmerger.is_aligned(), "Merged alignment is not aligned"
        in1 = len([x for x in ext1._col_labels if x<0])
        in2 = len([x for x in ext2._col_labels if x<0])
        print "Merged:%d. Insertion1:%d Insertion2:%d BaseLen:%d" %(extmerger.get_length(),in1 , in2 , tlen)
        assert ( in1 + in2 + tlen - mixed) == extmerger.get_length(), "Lengths don't match up after merging. Merged:%d. Insertion1:%d Insertion2:%d BaseLen:%d Mixed-insertion: %d"  %(extmerger.get_length(),in1, in2 , tlen, mixed)
        assert ( in1 + in2 - mixed) == len(list(extmerger.iter_insertion_columns())), "Columns are not correctly labeled after merging. Merged insertion count:%d. Insertion1:%d Insertion2:%d Mixed-insertion: %d"  %(len(list(extmerger.iter_insertion_columns())),in1 , in1, mixed)
         
        
        tmp = extmerger.get_base_readonly_alignment().get_mutable_alignment()
        tmp.delete_all_gap()
        assert tmp.is_aligned(), "merged alignment should be aligned!"
        assert tmp.get_length() == tlen, "merged alignment has wrong length"
        assert all([alg[k] == s for (k,s) in tmp.items()]), "merged alignment should match original alignment"

        
        print "======= finished testExtendedAlignment ========="