예제 #1
0
 def concatenate_alns(self):
     """Concatenate all alns into one aln.
     """
     physcraper.debug("concat alns")
     count = 0
     for gene in self.aln_all:
         if count == 0:
             aln1 = self.aln_all[gene]
             aln1.write(path="{}/aln1.fas".format(self.workdir),
                        schema="fasta")
             count = 1
         else:
             aln2 = self.aln_all[gene]
             count += 1
             aln2.write(path="{}/aln{}.fas".format(self.workdir, count),
                        schema="fasta")
             assert aln1.taxon_namespace == aln2.taxon_namespace
             aln1 = DnaCharacterMatrix.concatenate([aln1, aln2])
     aln1.write(path="{}/concat.fas".format(self.workdir), schema="fasta")
     self.concatenated_aln = aln1
예제 #2
0
    tmp_dict = {}
    for taxon, seq in physcraper_obj.aln.items():
        aln_dict[taxon.label] = seq
    seqlen = len(seq) #should all be same bc aligned
    for spp_name in spp_dict.keys():
        try:
            otu = random.choice(spp_dict[spp_name])
            tmp_dict[spp_name] = aln_dict[otu]
        except KeyError:
            tmp_dict[spp_name] = "-" * seqlen
    return tmp_dict

aln1 = DnaCharacterMatrix.from_dict(arbitrary_prune_fill(spp_to_otu1, gene1))
aln2 = DnaCharacterMatrix.from_dict(arbitrary_prune_fill(spp_to_otu2, gene2), taxon_namespace = aln1.taxon_namespace)

concat = DnaCharacterMatrix.concatenate([aln1,aln2])
concat.write(path="concat.fas",
            schema="fasta")





#Open the two pyscraper objects
#Merge the alignements on OTT_ID?
#How to force/missing data ...


#Option 1: randomly select one seq from each ott ID.
#Option 2: Use all pairwise?
#Option 3: force mono phyly of spps?  grrrrrrrrrrrrrrrr