class TestAnalysis(object): """ Test utilities in analysis. """ @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_cumulative.png') def test_cumulative(self): np.random.seed(0) data = np.random.rand(1000) fig = plt.figure(figsize=(25, 25)) ax00 = plt.subplot2grid((2, 2), (0, 0), fig=fig) ax01 = plt.subplot2grid((2, 2), (0, 1), fig=fig) ax10 = plt.subplot2grid((2, 2), (1, 0), fig=fig) ax11 = plt.subplot2grid((2, 2), (1, 1), fig=fig) raw, y, x = ra.cumulative(data) ax00.plot(x, y) ax00.set_title('cumulative') raw, y, x = ra.cumulative(data, cumulative=0) ax01.plot(x, y) ax01.set_title('non-cumulative') raw, y, x = ra.cumulative(data, cumulative=-1) ax10.plot(x, y) ax10.set_title('reverse-cumulative') raw, y, x = ra.cumulative(data) ax11.plot(x, raw) ax11.set_title('raw data') plt.tight_layout() return fig
class TestFragments(object): """ Test usage of the FragmentFrame component. """ def setup_method(self, method): self.dirpath = os.path.join(os.path.dirname(__file__), '..', 'data') self.frag3 = os.path.join(self.dirpath, 'wauto.200.3mers.gz') self.frag3q = os.path.join(self.dirpath, 'wauto.200.3mers.qual.gz') self.frag9 = os.path.join(self.dirpath, 'wauto.200.9mers.gz') self.frag9q = os.path.join(self.dirpath, 'wauto.200.9mers.qual.gz') @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_fragment_profiles.png') def test_quality_plot(self): df3 = parse_rosetta_fragments(self.frag3).sample_top_neighbors() df9 = parse_rosetta_fragments(self.frag9) # auto-load df3 = df3.add_quality_measure(None) # load target quality file with pytest.raises(ValueError): df9 = df9.add_quality_measure(self.frag3q) df9 = df9.add_quality_measure(self.frag9q) assert df3.is_comparable(df9) is False assert 'rmsd' in df3 assert 'rmsd' in df9 consensus_seq = df9.select_quantile().quick_consensus_sequence() consensus_sse = df9.select_quantile( ).quick_consensus_secondary_structure() assert consensus_seq == "KIPVPVVVNGKIVAVVVVPPENLEEALLEALKELGLIKDPEEVKAVVVSPDGRLELSF" assert consensus_sse == "EEEEEEEELLEEEEEEEELLLLHHHHHHHHHHHHLLLLLLLLLLEEEEELLLEEEEEE" fig = plt.figure(figsize=(25, 10)) plot_fragment_profiles(fig, df3, df9, consensus_seq, consensus_sse) plt.tight_layout() return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='add_fragments_replace.png') def test_add_fragments_replace(self): df = parse_rosetta_fragments(self.frag3, source='testfrags') xx = df[(df['frame'] <= 10) & (df['neighbor'] <= 100)] dfrep = df.add_fragments(xx, 10) fig = plt.figure(figsize=(25, 10)) ax0 = plt.subplot2grid((2, 1), (0, 0)) prange = range(len(dfrep.groupby('frame'))) ax0.bar(prange, [max(y['neighbor']) for x, y in dfrep.groupby('frame')]) ax1 = plt.subplot2grid((2, 1), (1, 0)) ax1.bar( prange, [y['neighbors'].unique()[0] for x, y in dfrep.groupby('frame')]) plt.tight_layout() return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='add_fragments_append.png') def test_add_fragments_append(self): df = parse_rosetta_fragments(self.frag3) xx = df[(df['frame'] <= 10) & (df['neighbor'] <= 100)] dfrep = df.add_fragments(xx, 10, 'append') fig = plt.figure(figsize=(25, 10)) ax0 = plt.subplot2grid((2, 1), (0, 0)) prange = range(len(dfrep.groupby('frame'))) ax0.bar(prange, [max(y['neighbor']) for x, y in dfrep.groupby('frame')]) ax1 = plt.subplot2grid((2, 1), (1, 0)) ax1.bar( prange, [y['neighbors'].unique()[0] for x, y in dfrep.groupby('frame')]) plt.tight_layout() return fig def test_frequency_matrices_and_networks(self): df3 = parse_rosetta_fragments(self.frag3) df9 = parse_rosetta_fragments(self.frag9) # auto-load df3 = df3.add_quality_measure(None) # load target quality file df9 = df9.add_quality_measure(self.frag9q) matrix = df3.select_quantile(0.1).make_sequence_matrix() assert matrix.min().min() == -9 matrix = df9.select_quantile(0.1).make_sequence_matrix(frequency=True) G = df9.select_quantile(0.1).make_per_position_frequency_network() Gf = df9.select_quantile(0.1).make_frequency_network() assert matrix.shape == (58, 20) assert G.number_of_edges() > Gf.number_of_edges() value = 1 - G.get_edge_data("0X", "1A")['weight'] assert matrix["A"].values[0] == pytest.approx(value) n = 6 target = str(n + 1) + "R" for aa in list("ARNDCQEGHILKMFPSTWYV"): origin = str(n) + aa if origin in G: value = 1 - G.get_edge_data(origin, target)['weight'] assert matrix["R"].values[n] == pytest.approx(value) def test_concat_fragments(self): # load fragments _3mers = parse_rosetta_fragments(self.frag3) # make chunks _3mers_1 = _3mers[(_3mers['frame'] >= 9) & (_3mers['frame'] <= 12)] _3mers_2 = _3mers[(_3mers['frame'] >= 22) & (_3mers['frame'] <= 24)] _3mers_3 = _3mers[(_3mers['frame'] >= 45) & (_3mers['frame'] <= 46)] # mix fragments m = concat_fragments([_3mers_3, _3mers_1, _3mers_2]) # checkpoints assert len(m) == 5400 assert list(m.drop_duplicates('frame')['frame']) == list(range(1, 10)) f1 = NamedTemporaryFile(delete=False) f1.close() nonstrict = write_rosetta_fragments(m, 3, 300, f1.name, False) f2 = NamedTemporaryFile(delete=False) f2.close() isstrict = write_rosetta_fragments(m.renumber(10).top_limit(30), prefix=f2.name, strict=True) assert not parse_rosetta_fragments(nonstrict).is_comparable( parse_rosetta_fragments(isstrict))
class TestPlotUtils( object ): """ Test utilities in plots. """ def setup_method( self, method ): self.dirpath = os.path.join(os.path.dirname(__file__), '..', 'data') @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_titles.png') def test_plot_titles( self ): fig = plt.figure(figsize=(10, 10)) grid = (1, 1) ax00 = plt.subplot2grid(grid, (0, 0), fig=fig) ax00.plot([1, 2, 3], [1, 2, 3]) ru.add_right_title(ax00, 'right title text', rotation=-90) ru.add_top_title(ax00, 'top title text') ru.add_left_title(ax00, 'left title text', rotation=90) return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_96wells_blanc.png') def test_plot_96wells_blanc( self ): fig, ax = rp.plot_96wells() return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_96wells_color.png') def test_plot_96wells_color( self ): np.random.seed(0) df = pd.DataFrame(np.random.randn(8, 12)) fig, ax = rp.plot_96wells(cdata=df) return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_96wells_size.png') def test_plot_96wells_size( self ): np.random.seed(0) df = pd.DataFrame(np.random.randn(8, 12)) fig, ax = rp.plot_96wells(sdata=-df) return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_96wells_bool.png') def test_plot_96wells_bool( self ): np.random.seed(0) df = pd.DataFrame(np.random.randn(8, 12)) fig, ax = rp.plot_96wells(bdata=df < 0) return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_96wells_all.png') def test_plot_96wells_all( self ): np.random.seed(0) df = pd.DataFrame(np.random.randn(8, 12)) fig, ax = rp.plot_96wells(cdata=df, sdata=-df, bdata=df < 0) return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_spr.png') def test_spr( self ): df = ri.read_SPR(os.path.join(self.dirpath, 'spr_data.csv.gz')) fig = plt.figure(figsize=(10, 6.7)) ax = plt.subplot2grid((1, 1), (0, 0)) rp.plot_SPR(df, ax, datacolor='black', fitcolor='red') return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_cd.png') def test_cd( self ): df = pd.read_csv(os.path.join(self.dirpath, 'cd.csv')) fig = plt.figure(figsize=(10, 6.7)) ax = plt.subplot2grid((1, 1), (0, 0)) rp.plot_CD(df, ax) return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_cd2.png') def test_cd_read( self ): def sampling( m, n ): return [i * n // m + n // (2 * m) for i in range(m)] df = ri.read_CD(os.path.join(self.dirpath, 'CD'), prefix='kx8', model='J-815') assert len(df['bin'].unique()) == 36 assert sampling(5, 35) == [3, 10, 17, 24, 31] fig = plt.figure(figsize=(10, 6.7)) ax = plt.subplot2grid((1, 1), (0, 0)) rp.plot_CD(df, ax, sample=5) return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_cd_chirascan.png') def test_cd_read_chirascan( self ): df = ri.read_CD(os.path.join(self.dirpath, 'chirascan_cd.csv'), model='chirascan') fig = plt.figure(figsize=(15, 15)) grid = (3, 2) for i, sample in enumerate(sorted(df.keys())): ax = plt.subplot2grid(grid, (int(i / 2), i % 2), fig=fig) rp.plot_CD(df[sample], ax, sample=5) ru.add_top_title(ax, sample) plt.tight_layout() return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_mals.png') def test_mals( self ): df = pd.read_csv(os.path.join(self.dirpath, 'mals.csv')) fig = plt.figure(figsize=(10, 6.7)) ax = plt.subplot2grid((1, 1), (0, 0)) rp.plot_MALS(df, ax) return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_mals2.png') def test_mals_read( self ): df = ri.read_MALS(filename=os.path.join(self.dirpath, 'mota_1kx8_d2.csv'), mmfile=os.path.join(self.dirpath, 'mota_1kx8_d2_mm.csv')) fig = plt.figure(figsize=(10, 6.7)) ax = plt.subplot2grid((1, 1), (0, 0)) rp.plot_MALS(df, ax) return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_tm.png') def test_thermal_melt( self ): df = pd.read_csv(os.path.join(self.dirpath, 'thermal_melt.csv')) fig = plt.figure(figsize=(10, 6.7)) ax = plt.subplot2grid((1, 1), (0, 0)) rp.plot_thermal_melt(df, ax) return fig def test_multi_fastq( self ): indat = {'binder1': {'conc1': os.path.join(self.dirpath, 'cdk2_rand_001.fasq.gz'), 'conc2': os.path.join(self.dirpath, 'cdk2_rand_002.fasq.gz'), 'conc3': os.path.join(self.dirpath, 'cdk2_rand_003.fasq.gz')}, 'binder2': {'conc1': os.path.join(self.dirpath, 'cdk2_rand_004.fasq.gz'), 'conc2': os.path.join(self.dirpath, 'cdk2_rand_005.fasq.gz'), 'conc3': os.path.join(self.dirpath, 'cdk2_rand_006.fasq.gz')}} enrich = {'binder1': ['conc1', 'conc3'], 'binder2': ['conc1', 'conc3']} bounds = ['GAS', 'PGT'] matches = ['ALKKI'] df = ru.sequencing_enrichment(indat, enrich, bounds, matches) assert 'binder2_conc1' in df.columns assert 'binder1_conc3' in df.columns assert 'enrichment_binder1' in df.columns assert df.shape == (20, 11) assert df['enrichment_binder2'].mean() == pytest.approx(1.13, rel=1e-3) assert df['enrichment_binder1'].max() == pytest.approx(5, rel=1e-3) @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_color_hydrophobicity.png') def test_color_scheme_hydrophobicity( self ): df = rc.DesignFrame(pd.read_csv(os.path.join(self.dirpath, 'logo_plot_sequence.csv'), header=None).rename(columns={0: 'sequence_A'})) fig, axs = rp.logo_plot(df, "A", refseq=False, font_size=10, hight_prop=2, colors='HYDROPHOBICITY') return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_color_chemistry.png') def test_color_scheme_chemistry( self ): df = rc.DesignFrame(pd.read_csv(os.path.join(self.dirpath, 'logo_plot_sequence.csv'), header=None).rename(columns={0: 'sequence_A'})) fig, axs = rp.logo_plot(df, "A", refseq=False, line_break=50, font_size=10, hight_prop=2, colors="CHEMISTRY") return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_color_charge.png') def test_color_scheme_charge( self ): df = rc.DesignFrame(pd.read_csv(os.path.join(self.dirpath, 'logo_plot_sequence.csv'), header=None).rename(columns={0: 'sequence_A'})) fig, axs = rp.logo_plot(df, "A", refseq=False, line_break=50, font_size=10, hight_prop=2, colors="CHARGE") return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_color_custom.png') def test_color_scheme_custom( self ): custom = { 'A': '#e6194b', 'C': '#3cb44b', 'D': '#ffe119', 'E': '#ffe119', 'F': '#f58231', 'G': '#911eb4', 'H': '#46f0f0', 'I': '#f032e6', 'K': '#d2f53c', 'L': '#d2f53c', 'M': '#008080', 'N': '#e6beff', 'P': '#aa6e28', 'Q': '#fffac8', 'R': '#800000', 'S': '#aaffc3', 'T': '#808000', 'V': '#ffd8b1', 'W': '#000080', 'Y': '#808080' } df = rc.DesignFrame(pd.read_csv(os.path.join(self.dirpath, 'logo_plot_sequence.csv'), header=None).rename(columns={0: 'sequence_A'})) fig, axs = rp.logo_plot(df, "A", refseq=False, line_break=50, font_size=10, hight_prop=2, colors=custom) return fig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_logo_sse.png') def test_sse_logo(self): custom = { 'E': '#0000FF', 'H': '#00FF00', 'L': '#FF0000' } ff = os.path.join(self.dirpath, 'input_3ssepred.minisilent.gz') df = ri.parse_rosetta_file(ff, {'structure': 'A'}) fs = df.structure_bits('A') fig, axs = rp.logo_plot(fs, "A", refseq=False, line_break=50, font_size=10, hight_prop=2, colors=custom) return fig def test_plot_labels( self ): plt.plot([random.randint(0, 100) for i in range(100)], label='text1') plt.plot([random.randint(0, 100) for i in range(100)], label='text2') ax = plt.gca() ax.legend() inilabs = [x.get_text() for x in ax.get_legend().texts] newlabs = ['text3', 'text4'] ru.edit_legend_text(ax, newlabs, 'lines') endlabs = [x.get_text() for x in ax.get_legend().texts] with pytest.raises(IndexError): ru.edit_legend_text(ax, ['text1', 'text2', 'text3'], 'lines') plt.close() assert newlabs == endlabs assert endlabs != inilabs def test_colors( self ): red = [255, 0, 0] newred = ru.color_variant(red, brightness_offset=1) assert newred == '#ff0101' cmap = ru.add_white_to_cmap(color='blue') assert cmap.name == 'FromWhite' assert cmap.N == 256
class TestExecutables(object): """ Test usage for the stand alone executables. """ def setup_method(self, method): self.dirpath = os.path.join(os.path.dirname(__file__), '..', 'data') self.silent1 = os.path.join(self.dirpath, 'input_2seq.minisilent.gz') self.silent2 = os.path.join(self.dirpath, 'input_sse.minsilent.gz') self.silent3 = os.path.join(self.dirpath, 'input_ssebig.minisilent.gz') self.silent4 = os.path.join(self.dirpath, 'input_3ssepred.minisilent.gz') self.fastawt = os.path.join(self.dirpath, 'input_2seq.wt.seq') self.frag3 = os.path.join(self.dirpath, 'wauto.200.3mers.gz') self.frag3q = os.path.join(self.dirpath, 'wauto.200.3mers.qual.gz') self.frag9 = os.path.join(self.dirpath, 'wauto.200.9mers.gz') self.frag9q = os.path.join(self.dirpath, 'wauto.200.9mers.qual.gz') @pytest.fixture(autouse=True) def setup(self, tmpdir): self.tmpdir = tmpdir.strpath def test_exe_minisilent_gz(self): options = Namespace(ifile=self.silent1, ifiles=None, force=False, ofile=os.path.join(self.tmpdir, "minisilent.gz")) minisilent_main(options) def test_exe_minisilent(self): options = Namespace(ifile=self.silent1, ifiles=None, force=False, ofile=os.path.join(self.tmpdir, "minisilent.sc")) minisilent_main(options) def test_exe_rename_gz(self): options = Namespace(ifile=self.silent1, prefix='test', force=False, ofile=os.path.join(self.tmpdir, "renamed.gz")) rename_main(options) def test_exe_rename(self): options = Namespace(ifile=self.silent1, prefix='test', force=False, ofile=os.path.join(self.tmpdir, "renamed.sc")) rename_main(options) @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_exe_check_mutants_logo.png') def test_exe_check_mutants_logo(self): options = Namespace(ifile=self.silent1, ifiles=None, ifasta=None, seqID='B', ffile=self.fastawt, ofile=os.path.join(self.tmpdir, 'mutants_'), iformat='png', ifont=35) lfig, afig = check_mutants_main(options) return lfig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_exe_check_mutants_ali.png') def test_exe_check_mutants_ali(self): options = Namespace(ifile=self.silent1, ifiles=None, ifasta=None, seqID='B', ffile=self.fastawt, ofile=os.path.join(self.tmpdir, 'mutants_'), iformat='png', ifont=35) lfig, afig = check_mutants_main(options) return afig @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_exe_fragments.png') def test_exe_plot_fragments(self): options = Namespace(fsmall=self.frag3, qsmall=self.frag3q, flarge=self.frag9, qlarge=self.frag9q, pdb=None, silent=True, format='h', ofile=None) return fragment_main(options) @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='plot_exe_regplot.png') def test_exe_regplot(self): options = Namespace(ifile=self.silent3, ifiles=None, x='finalRMSD', y='score', title='test plot', color=0, xlab='rmsd', ylab='score', ylim=[-80, -20], xlim=[0, 6], fsize=(20, 20), silent=True, ofile=None) return regplot_main(options)
class TestReadSilentFiles(object): """ Test reading silent files. Checks: apply different description and data retrival logic. """ @pytest.fixture(autouse=True) def setup(self, tmpdir): self.dirpath = os.path.join(os.path.dirname(__file__), '..', 'data') self.tmpdir = tmpdir.strpath @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(), filename='seq_freq_plot_fasta.png') def test_fasta(self): # Test simple read plain_id_string = "{}|PDBID|CHAIN|SEQUENCE" plain_ids = [ "2TEP:A", "2TEP:B", "2TEP:C", "2TEP:D", "3TP2:A", "3TP2:B" ] plain_ids = [plain_id_string.format(_) for _ in plain_ids] df1 = read_fasta(os.path.join(self.dirpath, "*.fa$"), multi=True) assert sorted(plain_ids) == sorted(list(df1['description'].values)) assert len(df1['sequence_A'].values[0]) == 236 assert len(df1['sequence_A'].values[-1]) == 229 assert df1.shape == (6, 2) assert len(df1['sequence_A'].unique()) == 2 # Test expanded read expand_ids = ["2TEP", "3TP2"] df2 = read_fasta(os.path.join(self.dirpath, "*.fa$"), expand=True, multi=True) assert sorted(expand_ids) == sorted(list(df2['description'].values)) assert df2.shape == (2, 5) assert 'sequence_A' in df2 assert 'sequence_B' in df2 assert 'sequence_C' in df2 assert 'sequence_D' in df2 # Test write all_text = [ ">2TEP:A|PDBID|CHAIN|SEQUENCE:A", "AETVSFNFNSFSEGNPAINFQGDVTVLSNGNIQLTNLNKVNSVGRVLYAMPVRIWSSATGNVASFLTSFSFEMKDIKD" "YDPADGIIFFIAPEDTQIPAGSIGGGTLGVSDTKGAGHFVGVEFDTYSNSEYNDPPTDHVGIDVNSVDSVKTVPWNSV" "SGAVVKVTVIYDSSTKTLSVAVTNDNGDITTIAQVVDLKAKLPERVKFGFSASGSLGGRQIHLIRSWSFTSTLITTTRRS", ">2TEP:B|PDBID|CHAIN|SEQUENCE:A", "AETVSFNFNSFSEGNPAINFQGDVTVLSNGNIQLTNLNKVNSVGRVLYAMPVRIWSSATGNVASFLTSFSFEMKDIKDY" "DPADGIIFFIAPEDTQIPAGSIGGGTLGVSDTKGAGHFVGVEFDTYSNSEYNDPPTDHVGIDVNSVDSVKTVPWNSVSG" "AVVKVTVIYDSSTKTLSVAVTNDNGDITTIAQVVDLKAKLPERVKFGFSASGSLGGRQIHLIRSWSFTSTLITTTRRS", ">2TEP:C|PDBID|CHAIN|SEQUENCE:A", "AETVSFNFNSFSEGNPAINFQGDVTVLSNGNIQLTNLNKVNSVGRVLYAMPVRIWSSATGNVASFLTSFSFEMKDIKDY" "DPADGIIFFIAPEDTQIPAGSIGGGTLGVSDTKGAGHFVGVEFDTYSNSEYNDPPTDHVGIDVNSVDSVKTVPWNSVSG" "AVVKVTVIYDSSTKTLSVAVTNDNGDITTIAQVVDLKAKLPERVKFGFSASGSLGGRQIHLIRSWSFTSTLITTTRRS", ">2TEP:D|PDBID|CHAIN|SEQUENCE:A", "AETVSFNFNSFSEGNPAINFQGDVTVLSNGNIQLTNLNKVNSVGRVLYAMPVRIWSSATGNVASFLTSFSFEMKDIKDY" "DPADGIIFFIAPEDTQIPAGSIGGGTLGVSDTKGAGHFVGVEFDTYSNSEYNDPPTDHVGIDVNSVDSVKTVPWNSVSG" "AVVKVTVIYDSSTKTLSVAVTNDNGDITTIAQVVDLKAKLPERVKFGFSASGSLGGRQIHLIRSWSFTSTLITTTRRS", ">3TP2:A|PDBID|CHAIN|SEQUENCE:A", "GAMTSWRDKSAKVQVKESELPSSIPAQTGLTFNIWYNKWSQGFAGNTRFVSPFALQPQLHSGKTRGDNDGQLFFCLFFA" "KGMCCLGPKCEYLHHIPDEEDIGKLALRTEVLDCFGREKFADYREDMGGIGSFRKKNKTLYVGGIDGALNSKHLKPAQI" "ESRIRFVFSRLGDIDRIRYVESKNCGFVKFKYQANAEFAKEAMSNQTLLLPSDKEWDDRREGTGLLVKWAN", ">3TP2:B|PDBID|CHAIN|SEQUENCE:A", "GAMTSWRDKSAKVQVKESELPSSIPAQTGLTFNIWYNKWSQGFAGNTRFVSPFALQPQLHSGKTRGDNDGQLFFCLFFA" "KGMCCLGPKCEYLHHIPDEEDIGKLALRTEVLDCFGREKFADYREDMGGIGSFRKKNKTLYVGGIDGALNSKHLKPAQI" "ESRIRFVFSRLGDIDRIRYVESKNCGFVKFKYQANAEFAKEAMSNQTLLLPSDKEWDDRREGTGLLVKWAN" ] assert write_fasta(df1, "A") == "\n".join(all_text) + "\n" # Pick chains from expanded read picked_text = [ ">2TEP:A", "AETVSFNFNSFSEGNPAINFQGDVTVLSNGNIQLTNLNKVNSVGRVLYAMPVRIWSSATGNVASFLTSFSFEMKDIKDYDPA" "DGIIFFIAPEDTQIPAGSIGGGTLGVSDTKGAGHFVGVEFDTYSNSEYNDPPTDHVGIDVNSVDSVKTVPWNSVSGAVVKVT" "VIYDSSTKTLSVAVTNDNGDITTIAQVVDLKAKLPERVKFGFSASGSLGGRQIHLIRSWSFTSTLITTTRRS", ">3TP2:A", "GAMTSWRDKSAKVQVKESELPSSIPAQTGLTFNIWYNKWSQGFAGNTRFVSPFALQPQLHSGKTRGDNDGQLFFCLFFAKGM" "CCLGPKCEYLHHIPDEEDIGKLALRTEVLDCFGREKFADYREDMGGIGSFRKKNKTLYVGGIDGALNSKHLKPAQIESRIRF" "VFSRLGDIDRIRYVESKNCGFVKFKYQANAEFAKEAMSNQTLLLPSDKEWDDRREGTGLLVKWAN", ">2TEP:C", "AETVSFNFNSFSEGNPAINFQGDVTVLSNGNIQLTNLNKVNSVGRVLYAMPVRIWSSATGNVASFLTSFSFEMKDIKDYDPA" "DGIIFFIAPEDTQIPAGSIGGGTLGVSDTKGAGHFVGVEFDTYSNSEYNDPPTDHVGIDVNSVDSVKTVPWNSVSGAVVKVT" "VIYDSSTKTLSVAVTNDNGDITTIAQVVDLKAKLPERVKFGFSASGSLGGRQIHLIRSWSFTSTLITTTRRS" ] assert write_fasta(df2, "AC") == "\n".join(picked_text) + "\n" # Individual prints write_fasta(df2, "A", filename=os.path.join(self.tmpdir, "singles.fa"), split=True) for _ in ["singles_f0001.fa", "singles_f0002.fa"]: df = read_fasta(os.path.join(self.tmpdir, _)) assert df.shape == (1, 2) newseq = df["sequence_A"].values[0] newid = df["description"].values[0] expected = df2[df2["description"] == newid.split(":") [0]]["sequence_A"].values[0] assert expected == newseq # plot fig = plt.figure(figsize=(25, 10)) ax = plt.subplot2grid((1, 1), (0, 0)) sequence_frequency_plot(df1, 'A', ax, refseq=False, key_residues='12-35', clean_unused=0) return fig def test_hmm(self): df = read_hmmsearch(os.path.join(self.dirpath, 'search.hmm.gz')) assert df.shape[0] == 4932 assert len(df['description'].unique()) == 4927 assert df[df['full-e-value'] < 10].shape[0] == 2650 df = read_hmmsearch(os.path.join(self.dirpath, 'search2.hmm.gz')) assert df.shape[0] == 11 assert len(df.iloc[0]['sequence']) == 87 df = read_hmmsearch(os.path.join(self.dirpath, 'scan.hmm.gz')) assert df.shape[0] == 9 assert len(df.iloc[0]['sequence']) == 99 def test_pymol(self): df = parse_rosetta_file( os.path.join(self.dirpath, 'input_2seq.minisilent.gz'), {'sequence': 'B'}) df.add_reference_sequence('B', df.iloc[0].get_sequence('B')) df = df.identify_mutants('B').head() pick1 = "" pick2 = "sele test_3lhp_binder_labeled_00002_mut, test_3lhp_binder_labeled_00002 and " \ "((c. B and (i. 1-2 or i. 7-9 or i. 11-12 or i. 14-17 or i. 19 or i. 21-23 or " \ "i. 25-27 or i. 31-33 or i. 35-39 or i. 42 or i. 45 or i. 48 or i. 52 or " \ "i. 64-68 or i. 70-75 or i. 77 or i. 79-82 or i. 84-86 or i. 88-89 or " \ "i. 91-102 or i. 104-111 or i. 113-116)))" sel = pymol_mutant_selector(df) assert len(sel[0]) == 0 assert sel[0] == pick1 assert len(sel[1]) != 0 assert sel[1] == pick2 def test_master(self): df = parse_master_file(os.path.join(self.dirpath, 'master.search'), max_rmsd=1.4, piece_count=2, shift_0=True) assert df.rmsd.max() == 1.3967 assert df.shape == (42, 5) assert df.iloc[-1].match == [[34, 40], [42, 48]]