def get_features_in_aln(alignment, variant, save_dir="", save_gff=True): #Let's extract consensus a=SummaryInfo(alignment) cons=a.dumb_consensus(threshold=0.1, ambiguous='X') seq = Sequence(id="Consensus", variant_id=variant, taxonomy_id=1, sequence=cons.tostring()) updated_features = get_variant_features(seq, save_dir=save_dir, save_gff=save_gff) return updated_features
def test_proteins(self): a = MultipleSeqAlignment([ SeqRecord(Seq("MHQAIFIYQIGYP*LKSGYIQSIRSPEYDNW-"), id="ID001"), SeqRecord(Seq("MH--IFIYQIGYAYLKSGYIQSIRSPEY-NW*"), id="ID002"), SeqRecord(Seq("MHQAIFIYQIGYPYLKSGYIQSIRSPEYDNW*"), id="ID003") ]) self.assertEqual(32, a.get_alignment_length()) s = SummaryInfo(a) c = s.dumb_consensus(ambiguous="X") self.assertEqual(str(c), "MHQAIFIYQIGYXXLKSGYIQSIRSPEYDNW*") c = s.gap_consensus(ambiguous="X") self.assertEqual(str(c), "MHXXIFIYQIGYXXLKSGYIQSIRSPEYXNWX") m = s.pos_specific_score_matrix(chars_to_ignore=["-", "*"], axis_seq=c) self.assertEqual( str(m), """ A D E F G H I K L M N P Q R S W Y M 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 H 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 X 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0 X 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 F 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Y 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 Q 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 G 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Y 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 X 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 X 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 L 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 K 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 S 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 G 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Y 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Q 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 S 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 R 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 S 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 P 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 E 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Y 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 X 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 N 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 W 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 X 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 """) letters = IUPACData.protein_letters base_freq = 1.0 / len(letters) e_freq_table = {letter: base_freq for letter in letters} ic = s.information_content(e_freq_table=e_freq_table, chars_to_ignore=["-", "*"]) self.assertAlmostEqual(ic, 133.061475107, places=6)
def trim_aln_to_seq_length(alignment,sequence): """Trim alignment to a sequence, i.e. leave only postions that correspond to this sequence span""" n1=str(uuid.uuid4()) n2=str(uuid.uuid4()) #Get consensus a=SummaryInfo(alignment) cons=a.dumb_consensus(threshold=0.1, ambiguous='X') #Needle it SeqIO.write([SeqRecord(cons,id='CONS',name='CONS')],n1+'.fasta','fasta') SeqIO.write([SeqRecord(sequence,id='KEY',name='KEY')],n2+'.fasta','fasta') #Now we will redo it with Needlman Wunsh - the global alignment needle_cline = NeedleCommandline(asequence=n1+".fasta", bsequence=n2+".fasta",gapopen=10, gapextend=0.5, outfile=n1+".txt") stdout, stderr = needle_cline() # print('Needle alignment') align = AlignIO.read(n1+".txt", "emboss") os.system('rm %s.fasta %s.fasta %s.txt'%(n1,n2,n1)) # print align # print alignment #first seq is consensus, we need to get borders useing second one. seq=str(align[1,:].seq) # print seq begin=seq.index(str(sequence[0])) end=len(seq)-seq[::-1].index(str(sequence[-1])) print begin print end return alignment[:,begin:end]
def trim_hist_aln_to_core(msa): """Trims hist alignment to core""" templ_H3 = Seq( "ARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVALFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA", IUPAC.protein, ) templ_H4 = Seq( "SGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG", IUPAC.protein, ) templ_H2A = Seq( "SGRGKQGGKTRAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLTAEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGRVTIAQGGVLPNIQSVLLPKKTESSKSKSK", IUPAC.protein, ) templ_H2B = Seq( "AKSAPAPKKGSKKAVTKTQKKDGKKRRKTRKESYAIYVYKVLKQVHPDTGISSKAMSIMNSFVNDVFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSAK", IUPAC.protein, ) templ_core_H3 = templ_H3[43:114] templ_core_H4 = templ_H4[23:93] templ_core_H2A = templ_H2A[15:119] templ_core_H2B = templ_H2B[33:120] templ = {"H3": templ_core_H3, "H4": templ_core_H4, "H2A": templ_core_H2A, "H2B": templ_core_H2B} a = SummaryInfo(msa) cons = a.dumb_consensus(threshold=0.1, ambiguous="X") return trim_aln_to_seq_length(msa, templ[identify_hist_type(cons)])
def trim_aln_to_seq(alignment,sequence): """Trim alignment to a sequence, i.e. leave only postions that correspond to this sequence Note that seqeuence should be incorportatable into alignment without additional gaps in alignment. """ n1=str(uuid.uuid4()) n2=str(uuid.uuid4()) #Get consensus a=SummaryInfo(alignment) cons=a.dumb_consensus(threshold=0.1, ambiguous='X') #Needle it SeqIO.write([SeqRecord(cons,id='CONS',name='CONS')],n1+'.fasta','fasta') SeqIO.write([SeqRecord(sequence,id='KEY',name='KEY')],n2+'.fasta','fasta') #Now we will redo it with Needlman Wunsh - the global alignment needle_cline = NeedleCommandline(asequence=n1+".fasta", bsequence=n2+".fasta",gapopen=10, gapextend=0.5, outfile=n1+".txt") stdout, stderr = needle_cline() # print('Needle alignment') align = AlignIO.read(n1+".txt", "emboss") os.system('rm %s.fasta %s.fasta %s.txt'%(n1,n2,n1)) # print align # print alignment align.extend(alignment) a=align[1:,:] return trim_aln_to_key_seq(a,sequence)[1:,:]
def test_nucleotides(self): filename = "GFF/multi.fna" format = "fasta" alignment = AlignIO.read(filename, format, alphabet=unambiguous_dna) summary = SummaryInfo(alignment) c = summary.dumb_consensus(ambiguous="N") self.assertEqual(str(c), "NNNNNNNN") c = summary.gap_consensus(ambiguous="N") self.assertEqual(str(c), "NNNNNNNN") expected = {"A": 0.25, "G": 0.25, "T": 0.25, "C": 0.25} m = summary.pos_specific_score_matrix(chars_to_ignore=["-"], axis_seq=c) self.assertEqual( str(m), """ A C G T N 2.0 0.0 1.0 0.0 N 1.0 1.0 1.0 0.0 N 1.0 0.0 2.0 0.0 N 0.0 1.0 1.0 1.0 N 1.0 2.0 0.0 0.0 N 0.0 2.0 1.0 0.0 N 1.0 2.0 0.0 0.0 N 0.0 2.0 1.0 0.0 """) # Have a generic alphabet, without a declared gap char, so must tell # provide the frequencies and chars to ignore explicitly. ic = summary.information_content(e_freq_table=expected, chars_to_ignore=["-"]) self.assertAlmostEqual(ic, 7.32029999423075, places=6)
def calcAbundance(alignmentFile, consensusFile, abundanceFile, abundancePercentFile, verbose): print('Calculating the abundance matrix...') alignment = AlignIO.read(alignmentFile, "fasta") summary = SummaryInfo(alignment) consensusSeq = SeqIO.read(consensusFile, 'fasta') if (len(consensusSeq) == alignment.get_alignment_length()): abundanceMatrix = summary.pos_specific_score_matrix(consensusSeq) else: with open(consensusFile, "w") as f: SeqIO.write(SeqRecord( summary.dumb_consensus(), id='consensus'), f, "fasta") abundanceMatrix = summary.pos_specific_score_matrix() if verbose: print("Abundance matrix (absolute values):") print(str(abundanceMatrix)) with open(abundanceFile, 'w') as f: f.write(str(abundanceMatrix)) for pos, abundance in enumerate(abundanceMatrix): for res, value in abundance.items(): abundanceMatrix[pos][res] = 100.0 * float(value) / float(len(alignment)) if verbose: print("Abundance matrix (percentages):") print(str(abundanceMatrix)) with open(abundancePercentFile, 'w') as f: f.write(str(abundanceMatrix)) print('OK')
def trim_aln_to_seq_length(alignment, sequence): """Trim alignment to a sequence, i.e. leave only postions that correspond to this sequence span""" n1 = str(uuid.uuid4()) n2 = str(uuid.uuid4()) #Get consensus a = SummaryInfo(alignment) cons = a.dumb_consensus(threshold=0.1, ambiguous='X') #Needle it SeqIO.write([SeqRecord(cons, id='CONS', name='CONS')], n1 + '.fasta', 'fasta') SeqIO.write([SeqRecord(sequence, id='KEY', name='KEY')], n2 + '.fasta', 'fasta') #Now we will redo it with Needlman Wunsh - the global alignment needle_cline = NeedleCommandline(asequence=n1 + ".fasta", bsequence=n2 + ".fasta", gapopen=10, gapextend=0.5, outfile=n1 + ".txt") stdout, stderr = needle_cline() # print('Needle alignment') align = AlignIO.read(n1 + ".txt", "emboss") os.system('rm %s.fasta %s.fasta %s.txt' % (n1, n2, n1)) # print align # print alignment #first seq is consensus, we need to get borders useing second one. seq = str(align[1, :].seq) # print seq begin = seq.index(str(sequence[0])) end = len(seq) - seq[::-1].index(str(sequence[-1])) print begin print end return alignment[:, begin:end]
def trim_aln_to_seq(alignment, sequence): """Trim alignment to a sequence, i.e. leave only postions that correspond to this sequence Note that seqeuence should be incorportatable into alignment without additional gaps in alignment. """ n1 = str(uuid.uuid4()) n2 = str(uuid.uuid4()) #Get consensus a = SummaryInfo(alignment) cons = a.dumb_consensus(threshold=0.1, ambiguous='X') #Needle it SeqIO.write([SeqRecord(cons, id='CONS', name='CONS')], n1 + '.fasta', 'fasta') SeqIO.write([SeqRecord(sequence, id='KEY', name='KEY')], n2 + '.fasta', 'fasta') #Now we will redo it with Needlman Wunsh - the global alignment needle_cline = NeedleCommandline(asequence=n1 + ".fasta", bsequence=n2 + ".fasta", gapopen=10, gapextend=0.5, outfile=n1 + ".txt") stdout, stderr = needle_cline() # print('Needle alignment') align = AlignIO.read(n1 + ".txt", "emboss") os.system('rm %s.fasta %s.fasta %s.txt' % (n1, n2, n1)) # print align # print alignment align.extend(alignment) a = align[1:, :] return trim_aln_to_key_seq(a, sequence)[1:, :]
def test_proteins(self): alpha = HasStopCodon(Gapped(generic_protein, "-"), "*") a = MultipleSeqAlignment([ SeqRecord(Seq("MHQAIFIYQIGYP*LKSGYIQSIRSPEYDNW-", alpha), id="ID001"), SeqRecord(Seq("MH--IFIYQIGYAYLKSGYIQSIRSPEY-NW*", alpha), id="ID002"), SeqRecord(Seq("MHQAIFIYQIGYPYLKSGYIQSIRSPEYDNW*", alpha), id="ID003") ]) self.assertEqual(32, a.get_alignment_length()) s = SummaryInfo(a) c = s.dumb_consensus(ambiguous="X") self.assertEqual(str(c), "MHQAIFIYQIGYXXLKSGYIQSIRSPEYDNW*") c = s.gap_consensus(ambiguous="X") self.assertEqual(str(c), "MHXXIFIYQIGYXXLKSGYIQSIRSPEYXNWX") m = s.pos_specific_score_matrix(chars_to_ignore=['-', '*'], axis_seq=c) self.assertEqual( str(m), """ A D E F G H I K L M N P Q R S W Y M 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 H 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 X 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0 X 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 F 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Y 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 Q 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 G 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Y 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 X 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 X 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 L 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 K 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 S 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 G 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Y 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Q 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 S 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 R 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 S 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 P 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 E 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Y 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 X 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 N 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 W 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 X 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 """) ic = s.information_content(chars_to_ignore=['-', '*']) self.assertAlmostEqual(ic, 133.061475107, places=6)
def test_proteins(self): alpha = HasStopCodon(Gapped(generic_protein, "-"), "*") a = MultipleSeqAlignment([ SeqRecord(Seq("MHQAIFIYQIGYP*LKSGYIQSIRSPEYDNW-", alpha), id="ID001"), SeqRecord(Seq("MH--IFIYQIGYAYLKSGYIQSIRSPEY-NW*", alpha), id="ID002"), SeqRecord(Seq("MHQAIFIYQIGYPYLKSGYIQSIRSPEYDNW*", alpha), id="ID003")]) self.assertEqual(32, a.get_alignment_length()) s = SummaryInfo(a) c = s.dumb_consensus(ambiguous="X") self.assertEqual(str(c), "MHQAIFIYQIGYXXLKSGYIQSIRSPEYDNW*") c = s.gap_consensus(ambiguous="X") self.assertEqual(str(c), "MHXXIFIYQIGYXXLKSGYIQSIRSPEYXNWX") m = s.pos_specific_score_matrix(chars_to_ignore=['-', '*'], axis_seq=c) self.assertEqual(str(m), """ A D E F G H I K L M N P Q R S W Y M 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 H 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 X 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0 X 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 F 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Y 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 Q 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 G 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Y 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 X 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 X 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 L 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 K 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 S 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 G 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Y 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Q 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 S 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 I 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 R 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 S 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 P 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 E 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 Y 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 X 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 N 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 W 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 X 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 """) ic = s.information_content(chars_to_ignore=['-', '*']) self.assertAlmostEqual(ic, 133.061475107, places=6)
def get_aln_and_features(request, ids=None): from tools.hist_ss import templ, get_hist_ss_in_aln import subprocess import StringIO from Bio.Align import MultipleSeqAlignment from Bio.Align.AlignInfo import SummaryInfo if ids is None and request.method == "GET" and "id" in request.GET: ids = request.GET.getlist("id") else: #Returning 'false' stops Bootstrap table return "false" sequences = Sequence.objects.filter(id__in=ids[:50]) if len(sequences) == 0: return None, None elif len(sequences) == 1: #Already aligned to core histone canonical = {"name":"canonical{}".format(sequences.first().variant.core_type), "seq":str(templ[sequences.first().variant.core_type].seq)} sequences = [canonical, sequences.first().sequence.to_dict()] features = sequences.first().features else: try: hist_type = max( [(hist, sequences.filter(variant__core_type_id=hist).count()) for hist in ["H2A", "H2B", "H3", "H4", "H1"]], key=lambda x:x[1] )[0] except ValueError: hist_type = "Unknown" muscle = os.path.join(os.path.dirname(sys.executable), "muscle") process = subprocess.Popen([muscle], stdin=subprocess.PIPE, stdout=subprocess.PIPE) sequences = "\n".join([s.format() for s in sequences]) aln, error = process.communicate(sequences) seqFile = StringIO.StringIO() seqFile.write(aln) seqFile.seek(0) sequences = list(SeqIO.parse(seqFile, "fasta")) #Not in same order, but does it matter? msa = MultipleSeqAlignment(sequences) save_dir = os.path.join(os.path.sep, "tmp", "HistoneDB") if not os.path.exists(save_dir): os.makedirs(save_dir) hv,ss = get_hist_ss_in_aln(msa, hist_type=hist_type, save_dir=save_dir, debug=False) a = SummaryInfo(msa) cons = Sequence(id="consensus", sequence=a.dumb_consensus(threshold=0.1, ambiguous='X').tostring()) features = Features.from_dict(cons, ss) sequences = [{"name":s.id, "seq":s.seq.tostring()} for s in sequences] sequences.insert(0, cons.to_dict()) result = {"seqs":sequences, "features":features.full_gff()} return JsonResponse(result, safe=False)
def lymphocyte_factory( self ): num_lymphocytes = self.num_lymphocytes self.lymphocytes = [] summary_info = SummaryInfo( self.friendly ) consensus = summary_info.dumb_consensus() self.consensus = consensus for j in range( 0, num_lymphocytes ): lymphocyte = self.guess_gaps( consensus.data ) lymphocyte = self.scramble( lymphocyte ) self.lymphocytes.append( Lymphocyte( lymphocyte ) ) self.compute_accum_weight()
def get_features_in_aln(alignment, variant, save_dir="", save_gff=True): #Let's extract consensus a = SummaryInfo(alignment) cons = a.dumb_consensus(threshold=0.1, ambiguous='X') seq = Sequence(id="Consensus", variant_id=variant, taxonomy_id=1, sequence=str(cons)) updated_features = get_variant_features(seq, save_dir=save_dir, save_gff=save_gff) return updated_features
def get_hist_ss_in_aln(alignment, hist_type='Unknown', save_dir="", debug=True, save_censesus=False): """Returns sequence elements in histone alignment, all numbers assume first element in seq has number 0!!! Not like in PDB""" #Let's extract consensus if(debug): print alignment a=SummaryInfo(alignment) cons=a.dumb_consensus(threshold=0.1, ambiguous='X') if(debug): print "Consensus" print cons hv, ss = get_hist_ss(cons,hist_type,save_dir,True) if save_censesus: return hv,ss,cons return hv,ss
def annotate_hist_msa(msa, htype, variant=None): """Adds to the MSA lines from features.json""" # read json with open("inp_data/features.json") as ff: f = json.load(ff) f = f[htype] genseq = f["General" + htype]["sequence"] genf = f["General" + htype]["feature1"] a = SummaryInfo(msa) cons = a.dumb_consensus(threshold=0.1, ambiguous="X") sr_c = SeqRecord(id="consensus", seq=cons) sr_genseq = SeqRecord(id="template", seq=Seq(genseq)) auxmsa = muscle_aln([sr_c, sr_genseq]) auxmsa.sort() gapped_template = str(auxmsa[1].seq) gapped_cons = str(auxmsa[0].seq) s = list() for c, i in zip(gapped_cons, range(len(gapped_template))): if c != "-": s.append(gapped_template[i]) newgapped_template = "".join(s) # now we need to gap feature gapped_genf = list() k = 0 for c, i in zip(newgapped_template, range(len(newgapped_template))): if c != "-": gapped_genf.append(genf[i - k]) else: k = k + 1 gapped_genf.append("-") gapped_genf = "".join(gapped_genf) newmsa = MultipleSeqAlignment([SeqRecord(id="gi|features|id", description=htype, seq=Seq(gapped_genf))]) newmsa.extend(msa) # print newmsa return newmsa
def test_nucleotides(self): filename = "GFF/multi.fna" format = "fasta" alignment = AlignIO.read(filename, format, alphabet=unambiguous_dna) summary = SummaryInfo(alignment) c = summary.dumb_consensus(ambiguous="N") self.assertEqual(str(c), 'NNNNNNNN') self.assertNotEqual(c.alphabet, unambiguous_dna) self.assertTrue(isinstance(c.alphabet, DNAAlphabet)) c = summary.gap_consensus(ambiguous="N") self.assertEqual(str(c), 'NNNNNNNN') self.assertNotEqual(c.alphabet, unambiguous_dna) self.assertTrue(isinstance(c.alphabet, DNAAlphabet)) expected = FreqTable({"A": 0.25, "G": 0.25, "T": 0.25, "C": 0.25}, FREQ, unambiguous_dna) m = summary.pos_specific_score_matrix(chars_to_ignore=['-'], axis_seq=c) self.assertEqual(str(m), """ A C G T N 2.0 0.0 1.0 0.0 N 1.0 1.0 1.0 0.0 N 1.0 0.0 2.0 0.0 N 0.0 1.0 1.0 1.0 N 1.0 2.0 0.0 0.0 N 0.0 2.0 1.0 0.0 N 1.0 2.0 0.0 0.0 N 0.0 2.0 1.0 0.0 """) # Have a generic alphabet, without a declared gap char, so must tell # provide the frequencies and chars to ignore explicitly. ic = summary.information_content(e_freq_table=expected, chars_to_ignore=['-']) self.assertAlmostEqual(ic, 7.32029999423075, places=6)
def get_seed_aln_and_features(request, seed): from Bio.Align import MultipleSeqAlignment from Bio.Align.AlignInfo import SummaryInfo seed_file = os.path.join(settings.STATIC_ROOT_AUX, "browse", "seeds") try: histone = Histone.objects.get(id=seed) seed_file = os.path.join(seed_file, "{}".format(histone.id)) except Histone.DoesNotExist: try: variant = Variant.objects.get(id=seed) seed_file = os.path.join(seed_file, variant.hist_type.id, "{}".format(variant.id)) # the default names for canonical are with underscores, so we do not need to convert back. ALEXEY, 30/12/15 # seed_file = os.path.join(seed_file, variant.hist_type.id, "{}".format(variant.id.replace("canonical", "canonical_"))) except Variant.DoesNotExist: return HttpResponseNotFound('<h1>No histone variant with name {}</h1>'.format(seed)) download = request.GET.get("download", False) == "true" format = request.GET.get("format", "json") try: limit = int(request.GET.get("limit", 0)) except ValueError: limit = 0 consensus = request.GET.get("consensus", False) if not consensus in ["limit", "all", False]: consensus = "all" response = HttpResponse(content_type='text') if download: response['Content-Disposition'] = 'attachment; filename="{}.{}"'.format(seed, format) sequences = SeqIO.parse("{}.fasta".format(seed_file), "fasta") if consensus: sequences = [s for i, s in enumerate(sequences) if consensus == "all" or (consensus == "limit" and i < limit)] msa = MultipleSeqAlignment(sequences) a = SummaryInfo(msa) sequences.insert(0, SeqRecord(id="Consensus", description="", seq=a.dumb_consensus(threshold=0.1, ambiguous='X'))) limit = limit+1 if limit > 0 else 0 def limited_seqs(): for i, seq in enumerate(sequences): if not consensus or consensus == "limit" or (limit > 0 and i < limit): yield seq with open("{}.gff".format(seed_file)) as feature_file: features = feature_file.read() if format == "fasta": SeqIO.write(limited_seqs(), response, "fasta") elif format == "gff": response.write(features) elif format == "pdf": with open("{}.pdf".format(seed_file)) as pdf: response.write(pdf.read()) else: #Default format is json sequences = [{"name":seq.id, "seq":seq.seq.tostring()} for seq in limited_seqs()] result = {"seqs":sequences, "features":features} response.write(json.dumps(result)) return response
def get_aln_and_features(request, ids=None): from tools.hist_ss import get_variant_features from tools.L_shade_hist_aln import write_alignments import subprocess import StringIO from Bio.Align import MultipleSeqAlignment from Bio.Align.AlignInfo import SummaryInfo from Bio.SeqRecord import SeqRecord save_dir = os.path.join(os.path.sep, "tmp", "HistoneDB") if not os.path.exists(save_dir): os.makedirs(save_dir) os.chmod(save_dir,0o777) if ids is None and request.method == "GET" and "id" in request.GET: ids = request.GET.getlist("id") sequences = Sequence.objects.filter(id__in=ids[:50]) download = False upload = False elif request.GET.get("download", False) == "true": download = True upload = False else: #Returning 'false' stops Bootstrap table return "false" if request.GET.get("upload", False) == "true": uploaded_sequence = request.session.get("uploaded_sequences", []) if len(uploaded_sequence) > 0: try: variant = Variant.objects.get(id=uploaded_sequence[0]["variant"]) except: if len(sequences) > 0: variant = sequences[0].variant else: return "false" uploaded_sequence = Sequence( id=uploaded_sequence[0]["id"], variant=variant, sequence=uploaded_sequence[0]["sequence"], taxonomy=Taxonomy.objects.get(name=uploaded_sequence[0]["taxonomy"])) upload = True download = False if not download: if len(sequences) == 0: return None, None elif len(sequences) == 1: #Already aligned to core histone seq = sequences[0] hist_type = seq.variant.hist_type.id variants = [seq.variant] if upload: sequences = [uploaded_sequence, seq] else: #let's load the corresponding canonical try: if(("canonical" in str(seq.variant)) or ("generic" in str(seq.variant))): canonical=seq elif(str(seq.variant.hist_type)=="H1"): canonical=Sequence.objects.filter(variant_id='generic_'+str(seq.variant.hist_type),reviewed=True,taxonomy=seq.taxonomy)[0] else: canonical=Sequence.objects.filter(variant_id='canonical_'+str(seq.variant.hist_type),reviewed=True,taxonomy=seq.taxonomy)[0] except: try: #try H2A.X as a substitute for canonical if(str(seq.variant.hist_type)=='H2A'): canonical=Sequence.objects.filter(variant_id='H2A.X',reviewed=True,taxonomy=seq.taxonomy)[0] elif(str(seq.variant.hist_type)=='H3'): #Try H3.3 canonical=Sequence.objects.filter(variant_id='H3.3',reviewed=True,taxonomy=seq.taxonomy)[0] elif(str(seq.variant.id)=='scH1'): canonical=seq else: raise except: canonical=seq #we here default not to show the sequence by simply suppling itslef - only one line will be displayed #default Xenopus # if(str(seq.variant.hist_type)=="H1"): # canonical = Sequence(id="0000|xenopus|generic{}".format(hist_type), sequence=str(TemplateSequence.objects.get(variant="General{}".format(hist_type)).get_sequence().seq)) # else: # canonical = Sequence(id="0000|xenopus|canonical{}".format(hist_type), sequence=str(TemplateSequence.objects.get(variant="General{}".format(hist_type)).get_sequence().seq)) sequences = [canonical, seq] sequence_label = seq.short_description else: seq = sequences[0] variants = list(Variant.objects.filter(id__in=sequences.values_list("variant", flat=True).distinct())) sequence_label = "Consensus" muscle = os.path.join(os.path.dirname(sys.executable), "muscle") process = subprocess.Popen([muscle], stdin=subprocess.PIPE, stdout=subprocess.PIPE) sequences = "\n".join([s.format() for s in sequences]) aln, error = process.communicate(sequences) seqFile = StringIO.StringIO() seqFile.write(aln) seqFile.seek(0) sequences = list(SeqIO.parse(seqFile, "fasta")) #Not in same order, but does it matter? msa = MultipleSeqAlignment(sequences) a = SummaryInfo(msa) cons = Sequence(id=sequence_label, variant_id=variants[0].id, taxonomy_id=1, sequence=a.dumb_consensus(threshold=0.1, ambiguous='X').tostring()) save_dir = os.path.join(os.path.sep, "tmp", "HistoneDB") if not os.path.exists(save_dir): os.makedirs(save_dir) features = get_variant_features(cons, variants=variants, save_dir=save_dir) #A hack to avoid two canonical seqs unique_sequences = [sequences[0]] if len(sequences) == 2 and sequences[0].id == sequences[1].id else sequences # doing the Sequence.short_description work #Note that the gffs are also generated with the short description not sequences = [{"name":"QUERY" if "QUERY" in s.id else Sequence.long_to_short_description(s.id), "seq":s.seq.tostring()} for s in unique_sequences] # sequences = [{"name":s.id, "seq":s.seq.tostring()} for s in sequences] if sequence_label == "Consensus": sequences.insert(0, cons.to_dict(id=True)) request.session["calculated_msa_seqs"] = sequences request.session["calculated_msa_features"] = features#.to_ict() if features else {} result = {"seqs":sequences, "features":features} #.full_gff() if features else ""} return JsonResponse(result, safe=False) else: format = request.GET.get("format", "json") response = HttpResponse(content_type='text') response['Content-Disposition'] = 'attachment; filename="sequences.{}"'.format(format) sequences = request.session.get("calculated_msa_seqs", []) features = request.session.get("calculated_msa_features", "") #features = Features.from_dict(Sequence("Consensus"), features_dict) if features_dict else None if format == "fasta": for s in sequences: print >> response, ">{}\n{}".format(s["name"], s["seq"]) elif format == "gff": response.write(features) #.full_gff() if features else "") elif format == "pdf": aln = MultipleSeqAlignment([SeqRecord(Seq(s["seq"]), id=s["name"]) for s in sequences[1:]]) result_pdf = write_alignments( [aln], save_dir = save_dir ) with open(result_pdf) as pdf: response.write(pdf.read()) #Cleanup os.remove(result_pdf) else: #Default format is json result = {"seqs":sequences, "features":features} #.full_gff() if features else ""} response.write(json.dumps(result)) return response
def get_aln_and_features(request, ids=None): from tools.hist_ss import get_variant_features from tools.L_shade_hist_aln import write_alignments import subprocess import StringIO from Bio.Align import MultipleSeqAlignment from Bio.Align.AlignInfo import SummaryInfo from Bio.SeqRecord import SeqRecord save_dir = os.path.join(os.path.sep, "tmp", "HistoneDB") if not os.path.exists(save_dir): os.makedirs(save_dir) os.chmod(save_dir,0o777) if ids is None and request.method == "GET" and "id" in request.GET: ids = request.GET.getlist("id") sequences = Sequence.objects.filter(id__in=ids) download = False upload = False elif request.GET.get("download", False) == "true": download = True upload = False else: #Returning 'false' stops Bootstrap table return "false" if request.GET.get("upload", False) == "true": uploaded_sequence = request.session.get("uploaded_sequences", []) if len(uploaded_sequence) > 0: try: variant = Variant.objects.get(id=uploaded_sequence[0]["variant"]) except: if len(sequences) > 0: variant = sequences[0].variant else: return "false" uploaded_sequence = Sequence( id=uploaded_sequence[0]["id"], variant=variant, sequence=uploaded_sequence[0]["sequence"], taxonomy=Taxonomy.objects.get(name=uploaded_sequence[0]["taxonomy"])) upload = True download = False if not download: if len(sequences) == 0: return None, None elif len(sequences) == 1: #Already aligned to core histone seq = sequences[0] hist_type = seq.variant.hist_type.id variants = [seq.variant] if upload: sequences = [uploaded_sequence, seq] else: #let's load the corresponding canonical try: if(("canonical" in str(seq.variant)) or ("generic" in str(seq.variant))): canonical=seq elif(str(seq.variant.hist_type)=="H1"): canonical=Sequence.objects.filter(variant_id='generic_'+str(seq.variant.hist_type),reviewed=True,taxonomy=seq.taxonomy)[0] else: canonical=Sequence.objects.filter(variant_id='canonical_'+str(seq.variant.hist_type),reviewed=True,taxonomy=seq.taxonomy)[0] except: try: #try H2A.X as a substitute for canonical if(str(seq.variant.hist_type)=='H2A'): canonical=Sequence.objects.filter(variant_id='H2A.X',reviewed=True,taxonomy=seq.taxonomy)[0] elif(str(seq.variant.hist_type)=='H3'): #Try H3.3 canonical=Sequence.objects.filter(variant_id='H3.3',reviewed=True,taxonomy=seq.taxonomy)[0] elif(str(seq.variant.id)=='scH1'): canonical=seq else: raise except: canonical=seq #we here default not to show the sequence by simply suppling itslef - only one line will be displayed #default Xenopus # if(str(seq.variant.hist_type)=="H1"): # canonical = Sequence(id="0000|xenopus|generic{}".format(hist_type), sequence=str(TemplateSequence.objects.get(variant="General{}".format(hist_type)).get_sequence().seq)) # else: # canonical = Sequence(id="0000|xenopus|canonical{}".format(hist_type), sequence=str(TemplateSequence.objects.get(variant="General{}".format(hist_type)).get_sequence().seq)) sequences = [canonical, seq] sequence_label = seq.short_description else: seq = sequences[0] variants = list(Variant.objects.filter(id__in=sequences.values_list("variant", flat=True).distinct())) sequence_label = "Consensus" muscle = os.path.join(os.path.dirname(sys.executable), "muscle") process = subprocess.Popen([muscle], stdin=subprocess.PIPE, stdout=subprocess.PIPE) sequences = "\n".join([s.format() for s in sequences]) aln, error = process.communicate(sequences) seqFile = StringIO.StringIO() seqFile.write(aln) seqFile.seek(0) sequences = list(SeqIO.parse(seqFile, "fasta")) #Not in same order, but does it matter? msa = MultipleSeqAlignment(sequences) a = SummaryInfo(msa) cons = Sequence(id=sequence_label, variant_id=variants[0].id, taxonomy_id=1, sequence=a.dumb_consensus(threshold=0.1, ambiguous='X').tostring()) save_dir = os.path.join(os.path.sep, "tmp", "HistoneDB") if not os.path.exists(save_dir): os.makedirs(save_dir) features = get_variant_features(cons, variants=variants, save_dir=save_dir) #A hack to avoid two canonical seqs unique_sequences = [sequences[0]] if len(sequences) == 2 and sequences[0].id == sequences[1].id else sequences # doing the Sequence.short_description work #Note that the gffs are also generated with the short description not sequences = [{"name":"QUERY" if "QUERY" in s.id else Sequence.long_to_short_description(s.id), "seq":s.seq.tostring()} for s in unique_sequences] # sequences = [{"name":s.id, "seq":s.seq.tostring()} for s in sequences] if sequence_label == "Consensus": sequences.insert(0, cons.to_dict(id=True)) request.session["calculated_msa_seqs"] = sequences request.session["calculated_msa_features"] = features#.to_ict() if features else {} result = {"seqs":sequences, "features":features} #.full_gff() if features else ""} return JsonResponse(result, safe=False) else: format = request.GET.get("format", "json") response = HttpResponse(content_type='text') response['Content-Disposition'] = 'attachment; filename="sequences.{}"'.format(format) sequences = request.session.get("calculated_msa_seqs", []) features = request.session.get("calculated_msa_features", "") #features = Features.from_dict(Sequence("Consensus"), features_dict) if features_dict else None if format == "fasta": for s in sequences: print >> response, ">{}\n{}".format(s["name"], s["seq"]) elif format == "gff": response.write(features) #.full_gff() if features else "") elif format == "pdf": aln = MultipleSeqAlignment([SeqRecord(Seq(s["seq"]), id=s["name"]) for s in sequences[1:]]) result_pdf = write_alignments( [aln], save_dir = save_dir ) with open(result_pdf) as pdf: response.write(pdf.read()) #Cleanup os.remove(result_pdf) else: #Default format is json result = {"seqs":sequences, "features":features} #.full_gff() if features else ""} response.write(json.dumps(result)) return response