Example #1
0
 def test_jaspar(self):
     """correctly load jaspar formatted counts matrix"""
     path = "data/sample.jaspar"
     mid, pwm = jaspar.read(path)
     assert mid == ["PSSMid", "HGNCsymbol"], "ID line wrong"
     # note state indices are ordered by moltype
     base_order = list(get_moltype("dna"))
     expect = [
         [35, 374, 30, 121, 6, 121, 33],
         [0, 10, 0, 0, 3, 2, 44],
         [352, 3, 354, 268, 360, 222, 155],
         [2, 2, 5, 0, 10, 44, 157],
     ]
     assert_array_equal(pwm.array, array(expect).T)
     self.assertEqual(pwm[0, "A"], 352)
     self.assertEqual(pwm[3, "T"], 121)
Example #2
0
"""
Draw sequence logos
===================

Sequence logo's display sequence information. They're extensively applied to transcription factor binding site (TFBS) display. They can also be applied to sequence alignments more generally.
"""
#%%
# Drawing logo for a TFBS
# #######################
#
# We use the TFBS for the TAT box binding protein.

from cogent3 import load_aligned_seqs
from cogent3.parse import jaspar

_, pwm = jaspar.read("../../data/tbp.jaspar")
freqarr = pwm.to_freq_array()
freqarr[:5]  # illustrating the contents of the MotifFreqsArray

# %%
logo = freqarr.logo()
logo.show(height=250, width=500)

#%%
# Drawing a sequence logo from a multiple sequence alignment
# ##########################################################
#
# This can be done for an entire alignment, but bear in mind it can take some time to render. Note that we include gap characters in the display.

aln = load_aligned_seqs("../../data/brca1-bats.fasta", moltype="dna")
l = aln[:311].seqlogo(height=300, width=500, wrap=60, vspace=0.05)