class TestAnalysis(object):
    """
    Test utilities in analysis.
    """
    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_cumulative.png')
    def test_cumulative(self):
        np.random.seed(0)
        data = np.random.rand(1000)
        fig = plt.figure(figsize=(25, 25))
        ax00 = plt.subplot2grid((2, 2), (0, 0), fig=fig)
        ax01 = plt.subplot2grid((2, 2), (0, 1), fig=fig)
        ax10 = plt.subplot2grid((2, 2), (1, 0), fig=fig)
        ax11 = plt.subplot2grid((2, 2), (1, 1), fig=fig)
        raw, y, x = ra.cumulative(data)
        ax00.plot(x, y)
        ax00.set_title('cumulative')
        raw, y, x = ra.cumulative(data, cumulative=0)
        ax01.plot(x, y)
        ax01.set_title('non-cumulative')
        raw, y, x = ra.cumulative(data, cumulative=-1)
        ax10.plot(x, y)
        ax10.set_title('reverse-cumulative')
        raw, y, x = ra.cumulative(data)
        ax11.plot(x, raw)
        ax11.set_title('raw data')
        plt.tight_layout()
        return fig
Example #2
0
class TestFragments(object):
    """
    Test usage of the FragmentFrame component.
    """
    def setup_method(self, method):
        self.dirpath = os.path.join(os.path.dirname(__file__), '..', 'data')
        self.frag3 = os.path.join(self.dirpath, 'wauto.200.3mers.gz')
        self.frag3q = os.path.join(self.dirpath, 'wauto.200.3mers.qual.gz')
        self.frag9 = os.path.join(self.dirpath, 'wauto.200.9mers.gz')
        self.frag9q = os.path.join(self.dirpath, 'wauto.200.9mers.qual.gz')

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_fragment_profiles.png')
    def test_quality_plot(self):
        df3 = parse_rosetta_fragments(self.frag3).sample_top_neighbors()
        df9 = parse_rosetta_fragments(self.frag9)
        # auto-load
        df3 = df3.add_quality_measure(None)
        # load target quality file
        with pytest.raises(ValueError):
            df9 = df9.add_quality_measure(self.frag3q)
        df9 = df9.add_quality_measure(self.frag9q)

        assert df3.is_comparable(df9) is False

        assert 'rmsd' in df3
        assert 'rmsd' in df9

        consensus_seq = df9.select_quantile().quick_consensus_sequence()
        consensus_sse = df9.select_quantile(
        ).quick_consensus_secondary_structure()

        assert consensus_seq == "KIPVPVVVNGKIVAVVVVPPENLEEALLEALKELGLIKDPEEVKAVVVSPDGRLELSF"
        assert consensus_sse == "EEEEEEEELLEEEEEEEELLLLHHHHHHHHHHHHLLLLLLLLLLEEEEELLLEEEEEE"

        fig = plt.figure(figsize=(25, 10))
        plot_fragment_profiles(fig, df3, df9, consensus_seq, consensus_sse)
        plt.tight_layout()
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='add_fragments_replace.png')
    def test_add_fragments_replace(self):
        df = parse_rosetta_fragments(self.frag3, source='testfrags')
        xx = df[(df['frame'] <= 10) & (df['neighbor'] <= 100)]
        dfrep = df.add_fragments(xx, 10)

        fig = plt.figure(figsize=(25, 10))
        ax0 = plt.subplot2grid((2, 1), (0, 0))
        prange = range(len(dfrep.groupby('frame')))
        ax0.bar(prange,
                [max(y['neighbor']) for x, y in dfrep.groupby('frame')])
        ax1 = plt.subplot2grid((2, 1), (1, 0))
        ax1.bar(
            prange,
            [y['neighbors'].unique()[0] for x, y in dfrep.groupby('frame')])
        plt.tight_layout()
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='add_fragments_append.png')
    def test_add_fragments_append(self):
        df = parse_rosetta_fragments(self.frag3)
        xx = df[(df['frame'] <= 10) & (df['neighbor'] <= 100)]
        dfrep = df.add_fragments(xx, 10, 'append')

        fig = plt.figure(figsize=(25, 10))
        ax0 = plt.subplot2grid((2, 1), (0, 0))
        prange = range(len(dfrep.groupby('frame')))
        ax0.bar(prange,
                [max(y['neighbor']) for x, y in dfrep.groupby('frame')])
        ax1 = plt.subplot2grid((2, 1), (1, 0))
        ax1.bar(
            prange,
            [y['neighbors'].unique()[0] for x, y in dfrep.groupby('frame')])
        plt.tight_layout()
        return fig

    def test_frequency_matrices_and_networks(self):
        df3 = parse_rosetta_fragments(self.frag3)
        df9 = parse_rosetta_fragments(self.frag9)
        # auto-load
        df3 = df3.add_quality_measure(None)
        # load target quality file
        df9 = df9.add_quality_measure(self.frag9q)

        matrix = df3.select_quantile(0.1).make_sequence_matrix()
        assert matrix.min().min() == -9

        matrix = df9.select_quantile(0.1).make_sequence_matrix(frequency=True)
        G = df9.select_quantile(0.1).make_per_position_frequency_network()
        Gf = df9.select_quantile(0.1).make_frequency_network()

        assert matrix.shape == (58, 20)
        assert G.number_of_edges() > Gf.number_of_edges()

        value = 1 - G.get_edge_data("0X", "1A")['weight']
        assert matrix["A"].values[0] == pytest.approx(value)

        n = 6
        target = str(n + 1) + "R"
        for aa in list("ARNDCQEGHILKMFPSTWYV"):
            origin = str(n) + aa
            if origin in G:
                value = 1 - G.get_edge_data(origin, target)['weight']
                assert matrix["R"].values[n] == pytest.approx(value)

    def test_concat_fragments(self):
        # load fragments
        _3mers = parse_rosetta_fragments(self.frag3)
        # make chunks
        _3mers_1 = _3mers[(_3mers['frame'] >= 9) & (_3mers['frame'] <= 12)]
        _3mers_2 = _3mers[(_3mers['frame'] >= 22) & (_3mers['frame'] <= 24)]
        _3mers_3 = _3mers[(_3mers['frame'] >= 45) & (_3mers['frame'] <= 46)]
        # mix fragments
        m = concat_fragments([_3mers_3, _3mers_1, _3mers_2])

        # checkpoints
        assert len(m) == 5400
        assert list(m.drop_duplicates('frame')['frame']) == list(range(1, 10))

        f1 = NamedTemporaryFile(delete=False)
        f1.close()
        nonstrict = write_rosetta_fragments(m, 3, 300, f1.name, False)

        f2 = NamedTemporaryFile(delete=False)
        f2.close()
        isstrict = write_rosetta_fragments(m.renumber(10).top_limit(30),
                                           prefix=f2.name,
                                           strict=True)

        assert not parse_rosetta_fragments(nonstrict).is_comparable(
            parse_rosetta_fragments(isstrict))
Example #3
0
class TestPlotUtils( object ):
    """
    Test utilities in plots.
    """
    def setup_method( self, method ):
        self.dirpath = os.path.join(os.path.dirname(__file__), '..', 'data')

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_titles.png')
    def test_plot_titles( self ):
        fig  = plt.figure(figsize=(10, 10))
        grid = (1, 1)
        ax00 = plt.subplot2grid(grid, (0, 0), fig=fig)
        ax00.plot([1, 2, 3], [1, 2, 3])
        ru.add_right_title(ax00, 'right title text', rotation=-90)
        ru.add_top_title(ax00, 'top title text')
        ru.add_left_title(ax00, 'left title text', rotation=90)
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_96wells_blanc.png')
    def test_plot_96wells_blanc( self ):
        fig, ax = rp.plot_96wells()
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_96wells_color.png')
    def test_plot_96wells_color( self ):
        np.random.seed(0)
        df = pd.DataFrame(np.random.randn(8, 12))
        fig, ax = rp.plot_96wells(cdata=df)
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_96wells_size.png')
    def test_plot_96wells_size( self ):
        np.random.seed(0)
        df = pd.DataFrame(np.random.randn(8, 12))
        fig, ax = rp.plot_96wells(sdata=-df)
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_96wells_bool.png')
    def test_plot_96wells_bool( self ):
        np.random.seed(0)
        df = pd.DataFrame(np.random.randn(8, 12))
        fig, ax = rp.plot_96wells(bdata=df < 0)
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_96wells_all.png')
    def test_plot_96wells_all( self ):
        np.random.seed(0)
        df = pd.DataFrame(np.random.randn(8, 12))
        fig, ax = rp.plot_96wells(cdata=df, sdata=-df, bdata=df < 0)
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_spr.png')
    def test_spr( self ):
        df = ri.read_SPR(os.path.join(self.dirpath, 'spr_data.csv.gz'))
        fig = plt.figure(figsize=(10, 6.7))
        ax = plt.subplot2grid((1, 1), (0, 0))
        rp.plot_SPR(df, ax, datacolor='black', fitcolor='red')
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_cd.png')
    def test_cd( self ):
        df = pd.read_csv(os.path.join(self.dirpath, 'cd.csv'))
        fig = plt.figure(figsize=(10, 6.7))
        ax = plt.subplot2grid((1, 1), (0, 0))
        rp.plot_CD(df, ax)
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_cd2.png')
    def test_cd_read( self ):
        def sampling( m, n ):
            return [i * n // m + n // (2 * m) for i in range(m)]

        df = ri.read_CD(os.path.join(self.dirpath, 'CD'), prefix='kx8', model='J-815')
        assert len(df['bin'].unique()) == 36
        assert sampling(5, 35) == [3, 10, 17, 24, 31]
        fig = plt.figure(figsize=(10, 6.7))
        ax = plt.subplot2grid((1, 1), (0, 0))
        rp.plot_CD(df, ax, sample=5)
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_cd_chirascan.png')
    def test_cd_read_chirascan( self ):
        df = ri.read_CD(os.path.join(self.dirpath, 'chirascan_cd.csv'),  model='chirascan')
        fig  = plt.figure(figsize=(15, 15))
        grid = (3, 2)
        for i, sample in enumerate(sorted(df.keys())):
            ax = plt.subplot2grid(grid, (int(i / 2), i % 2), fig=fig)
            rp.plot_CD(df[sample], ax, sample=5)
            ru.add_top_title(ax, sample)
        plt.tight_layout()
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_mals.png')
    def test_mals( self ):
        df = pd.read_csv(os.path.join(self.dirpath, 'mals.csv'))
        fig = plt.figure(figsize=(10, 6.7))
        ax = plt.subplot2grid((1, 1), (0, 0))
        rp.plot_MALS(df, ax)
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_mals2.png')
    def test_mals_read( self ):
        df = ri.read_MALS(filename=os.path.join(self.dirpath, 'mota_1kx8_d2.csv'),
                          mmfile=os.path.join(self.dirpath, 'mota_1kx8_d2_mm.csv'))
        fig = plt.figure(figsize=(10, 6.7))
        ax = plt.subplot2grid((1, 1), (0, 0))
        rp.plot_MALS(df, ax)
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_tm.png')
    def test_thermal_melt( self ):
        df = pd.read_csv(os.path.join(self.dirpath, 'thermal_melt.csv'))
        fig = plt.figure(figsize=(10, 6.7))
        ax = plt.subplot2grid((1, 1), (0, 0))
        rp.plot_thermal_melt(df, ax)
        return fig

    def test_multi_fastq( self ):
        indat = {'binder1': {'conc1': os.path.join(self.dirpath, 'cdk2_rand_001.fasq.gz'),
                             'conc2': os.path.join(self.dirpath, 'cdk2_rand_002.fasq.gz'),
                             'conc3': os.path.join(self.dirpath, 'cdk2_rand_003.fasq.gz')},
                 'binder2': {'conc1': os.path.join(self.dirpath, 'cdk2_rand_004.fasq.gz'),
                             'conc2': os.path.join(self.dirpath, 'cdk2_rand_005.fasq.gz'),
                             'conc3': os.path.join(self.dirpath, 'cdk2_rand_006.fasq.gz')}}
        enrich = {'binder1': ['conc1', 'conc3'],
                  'binder2': ['conc1', 'conc3']}
        bounds = ['GAS', 'PGT']
        matches = ['ALKKI']
        df = ru.sequencing_enrichment(indat, enrich, bounds, matches)
        assert 'binder2_conc1' in df.columns
        assert 'binder1_conc3' in df.columns
        assert 'enrichment_binder1' in df.columns
        assert df.shape == (20, 11)
        assert df['enrichment_binder2'].mean() == pytest.approx(1.13, rel=1e-3)
        assert df['enrichment_binder1'].max() == pytest.approx(5, rel=1e-3)

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_color_hydrophobicity.png')
    def test_color_scheme_hydrophobicity( self ):
        df = rc.DesignFrame(pd.read_csv(os.path.join(self.dirpath, 'logo_plot_sequence.csv'),
                                        header=None).rename(columns={0: 'sequence_A'}))
        fig, axs = rp.logo_plot(df, "A", refseq=False, font_size=10, hight_prop=2,
                                colors='HYDROPHOBICITY')
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_color_chemistry.png')
    def test_color_scheme_chemistry( self ):
        df = rc.DesignFrame(pd.read_csv(os.path.join(self.dirpath, 'logo_plot_sequence.csv'),
                                        header=None).rename(columns={0: 'sequence_A'}))
        fig, axs = rp.logo_plot(df, "A", refseq=False, line_break=50, font_size=10, hight_prop=2,
                                colors="CHEMISTRY")
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_color_charge.png')
    def test_color_scheme_charge( self ):
        df = rc.DesignFrame(pd.read_csv(os.path.join(self.dirpath, 'logo_plot_sequence.csv'),
                                        header=None).rename(columns={0: 'sequence_A'}))
        fig, axs = rp.logo_plot(df, "A", refseq=False, line_break=50, font_size=10, hight_prop=2,
                                colors="CHARGE")
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_color_custom.png')
    def test_color_scheme_custom( self ):
        custom = {
            'A': '#e6194b', 'C': '#3cb44b', 'D': '#ffe119', 'E': '#ffe119',
            'F': '#f58231', 'G': '#911eb4', 'H': '#46f0f0', 'I': '#f032e6',
            'K': '#d2f53c', 'L': '#d2f53c', 'M': '#008080', 'N': '#e6beff',
            'P': '#aa6e28', 'Q': '#fffac8', 'R': '#800000', 'S': '#aaffc3',
            'T': '#808000', 'V': '#ffd8b1', 'W': '#000080', 'Y': '#808080'
        }
        df = rc.DesignFrame(pd.read_csv(os.path.join(self.dirpath, 'logo_plot_sequence.csv'),
                                        header=None).rename(columns={0: 'sequence_A'}))
        fig, axs = rp.logo_plot(df, "A", refseq=False, line_break=50, font_size=10, hight_prop=2,
                                colors=custom)
        return fig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_logo_sse.png')
    def test_sse_logo(self):
        custom = {
            'E': '#0000FF', 'H': '#00FF00', 'L': '#FF0000'
        }
        ff = os.path.join(self.dirpath, 'input_3ssepred.minisilent.gz')
        df = ri.parse_rosetta_file(ff, {'structure': 'A'})
        fs = df.structure_bits('A')
        fig, axs = rp.logo_plot(fs, "A", refseq=False, line_break=50, font_size=10, hight_prop=2,
                                colors=custom)
        return fig

    def test_plot_labels( self ):
        plt.plot([random.randint(0, 100) for i in range(100)], label='text1')
        plt.plot([random.randint(0, 100) for i in range(100)], label='text2')
        ax = plt.gca()
        ax.legend()
        inilabs = [x.get_text() for x in ax.get_legend().texts]
        newlabs = ['text3', 'text4']
        ru.edit_legend_text(ax, newlabs, 'lines')
        endlabs = [x.get_text() for x in ax.get_legend().texts]
        with pytest.raises(IndexError):
            ru.edit_legend_text(ax, ['text1', 'text2', 'text3'], 'lines')
        plt.close()

        assert newlabs == endlabs
        assert endlabs != inilabs

    def test_colors( self ):
        red = [255, 0, 0]
        newred = ru.color_variant(red, brightness_offset=1)
        assert newred == '#ff0101'

        cmap = ru.add_white_to_cmap(color='blue')
        assert cmap.name == 'FromWhite'
        assert cmap.N == 256
class TestExecutables(object):
    """
    Test usage for the stand alone executables.
    """
    def setup_method(self, method):
        self.dirpath = os.path.join(os.path.dirname(__file__), '..', 'data')
        self.silent1 = os.path.join(self.dirpath, 'input_2seq.minisilent.gz')
        self.silent2 = os.path.join(self.dirpath, 'input_sse.minsilent.gz')
        self.silent3 = os.path.join(self.dirpath, 'input_ssebig.minisilent.gz')
        self.silent4 = os.path.join(self.dirpath,
                                    'input_3ssepred.minisilent.gz')
        self.fastawt = os.path.join(self.dirpath, 'input_2seq.wt.seq')
        self.frag3 = os.path.join(self.dirpath, 'wauto.200.3mers.gz')
        self.frag3q = os.path.join(self.dirpath, 'wauto.200.3mers.qual.gz')
        self.frag9 = os.path.join(self.dirpath, 'wauto.200.9mers.gz')
        self.frag9q = os.path.join(self.dirpath, 'wauto.200.9mers.qual.gz')

    @pytest.fixture(autouse=True)
    def setup(self, tmpdir):
        self.tmpdir = tmpdir.strpath

    def test_exe_minisilent_gz(self):
        options = Namespace(ifile=self.silent1,
                            ifiles=None,
                            force=False,
                            ofile=os.path.join(self.tmpdir, "minisilent.gz"))
        minisilent_main(options)

    def test_exe_minisilent(self):
        options = Namespace(ifile=self.silent1,
                            ifiles=None,
                            force=False,
                            ofile=os.path.join(self.tmpdir, "minisilent.sc"))
        minisilent_main(options)

    def test_exe_rename_gz(self):
        options = Namespace(ifile=self.silent1,
                            prefix='test',
                            force=False,
                            ofile=os.path.join(self.tmpdir, "renamed.gz"))
        rename_main(options)

    def test_exe_rename(self):
        options = Namespace(ifile=self.silent1,
                            prefix='test',
                            force=False,
                            ofile=os.path.join(self.tmpdir, "renamed.sc"))
        rename_main(options)

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_exe_check_mutants_logo.png')
    def test_exe_check_mutants_logo(self):
        options = Namespace(ifile=self.silent1,
                            ifiles=None,
                            ifasta=None,
                            seqID='B',
                            ffile=self.fastawt,
                            ofile=os.path.join(self.tmpdir, 'mutants_'),
                            iformat='png',
                            ifont=35)
        lfig, afig = check_mutants_main(options)
        return lfig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_exe_check_mutants_ali.png')
    def test_exe_check_mutants_ali(self):
        options = Namespace(ifile=self.silent1,
                            ifiles=None,
                            ifasta=None,
                            seqID='B',
                            ffile=self.fastawt,
                            ofile=os.path.join(self.tmpdir, 'mutants_'),
                            iformat='png',
                            ifont=35)
        lfig, afig = check_mutants_main(options)
        return afig

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_exe_fragments.png')
    def test_exe_plot_fragments(self):
        options = Namespace(fsmall=self.frag3,
                            qsmall=self.frag3q,
                            flarge=self.frag9,
                            qlarge=self.frag9q,
                            pdb=None,
                            silent=True,
                            format='h',
                            ofile=None)
        return fragment_main(options)

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='plot_exe_regplot.png')
    def test_exe_regplot(self):
        options = Namespace(ifile=self.silent3,
                            ifiles=None,
                            x='finalRMSD',
                            y='score',
                            title='test plot',
                            color=0,
                            xlab='rmsd',
                            ylab='score',
                            ylim=[-80, -20],
                            xlim=[0, 6],
                            fsize=(20, 20),
                            silent=True,
                            ofile=None)
        return regplot_main(options)
class TestReadSilentFiles(object):
    """
    Test reading silent files.
    Checks: apply different description and data retrival logic.
    """
    @pytest.fixture(autouse=True)
    def setup(self, tmpdir):
        self.dirpath = os.path.join(os.path.dirname(__file__), '..', 'data')
        self.tmpdir = tmpdir.strpath

    @pytest.mark.mpl_image_compare(baseline_dir=baseline_test_dir(),
                                   filename='seq_freq_plot_fasta.png')
    def test_fasta(self):
        # Test simple read
        plain_id_string = "{}|PDBID|CHAIN|SEQUENCE"
        plain_ids = [
            "2TEP:A", "2TEP:B", "2TEP:C", "2TEP:D", "3TP2:A", "3TP2:B"
        ]
        plain_ids = [plain_id_string.format(_) for _ in plain_ids]
        df1 = read_fasta(os.path.join(self.dirpath, "*.fa$"), multi=True)

        assert sorted(plain_ids) == sorted(list(df1['description'].values))
        assert len(df1['sequence_A'].values[0]) == 236
        assert len(df1['sequence_A'].values[-1]) == 229
        assert df1.shape == (6, 2)
        assert len(df1['sequence_A'].unique()) == 2

        # Test expanded read
        expand_ids = ["2TEP", "3TP2"]
        df2 = read_fasta(os.path.join(self.dirpath, "*.fa$"),
                         expand=True,
                         multi=True)
        assert sorted(expand_ids) == sorted(list(df2['description'].values))
        assert df2.shape == (2, 5)
        assert 'sequence_A' in df2
        assert 'sequence_B' in df2
        assert 'sequence_C' in df2
        assert 'sequence_D' in df2

        # Test write
        all_text = [
            ">2TEP:A|PDBID|CHAIN|SEQUENCE:A",
            "AETVSFNFNSFSEGNPAINFQGDVTVLSNGNIQLTNLNKVNSVGRVLYAMPVRIWSSATGNVASFLTSFSFEMKDIKD"
            "YDPADGIIFFIAPEDTQIPAGSIGGGTLGVSDTKGAGHFVGVEFDTYSNSEYNDPPTDHVGIDVNSVDSVKTVPWNSV"
            "SGAVVKVTVIYDSSTKTLSVAVTNDNGDITTIAQVVDLKAKLPERVKFGFSASGSLGGRQIHLIRSWSFTSTLITTTRRS",
            ">2TEP:B|PDBID|CHAIN|SEQUENCE:A",
            "AETVSFNFNSFSEGNPAINFQGDVTVLSNGNIQLTNLNKVNSVGRVLYAMPVRIWSSATGNVASFLTSFSFEMKDIKDY"
            "DPADGIIFFIAPEDTQIPAGSIGGGTLGVSDTKGAGHFVGVEFDTYSNSEYNDPPTDHVGIDVNSVDSVKTVPWNSVSG"
            "AVVKVTVIYDSSTKTLSVAVTNDNGDITTIAQVVDLKAKLPERVKFGFSASGSLGGRQIHLIRSWSFTSTLITTTRRS",
            ">2TEP:C|PDBID|CHAIN|SEQUENCE:A",
            "AETVSFNFNSFSEGNPAINFQGDVTVLSNGNIQLTNLNKVNSVGRVLYAMPVRIWSSATGNVASFLTSFSFEMKDIKDY"
            "DPADGIIFFIAPEDTQIPAGSIGGGTLGVSDTKGAGHFVGVEFDTYSNSEYNDPPTDHVGIDVNSVDSVKTVPWNSVSG"
            "AVVKVTVIYDSSTKTLSVAVTNDNGDITTIAQVVDLKAKLPERVKFGFSASGSLGGRQIHLIRSWSFTSTLITTTRRS",
            ">2TEP:D|PDBID|CHAIN|SEQUENCE:A",
            "AETVSFNFNSFSEGNPAINFQGDVTVLSNGNIQLTNLNKVNSVGRVLYAMPVRIWSSATGNVASFLTSFSFEMKDIKDY"
            "DPADGIIFFIAPEDTQIPAGSIGGGTLGVSDTKGAGHFVGVEFDTYSNSEYNDPPTDHVGIDVNSVDSVKTVPWNSVSG"
            "AVVKVTVIYDSSTKTLSVAVTNDNGDITTIAQVVDLKAKLPERVKFGFSASGSLGGRQIHLIRSWSFTSTLITTTRRS",
            ">3TP2:A|PDBID|CHAIN|SEQUENCE:A",
            "GAMTSWRDKSAKVQVKESELPSSIPAQTGLTFNIWYNKWSQGFAGNTRFVSPFALQPQLHSGKTRGDNDGQLFFCLFFA"
            "KGMCCLGPKCEYLHHIPDEEDIGKLALRTEVLDCFGREKFADYREDMGGIGSFRKKNKTLYVGGIDGALNSKHLKPAQI"
            "ESRIRFVFSRLGDIDRIRYVESKNCGFVKFKYQANAEFAKEAMSNQTLLLPSDKEWDDRREGTGLLVKWAN",
            ">3TP2:B|PDBID|CHAIN|SEQUENCE:A",
            "GAMTSWRDKSAKVQVKESELPSSIPAQTGLTFNIWYNKWSQGFAGNTRFVSPFALQPQLHSGKTRGDNDGQLFFCLFFA"
            "KGMCCLGPKCEYLHHIPDEEDIGKLALRTEVLDCFGREKFADYREDMGGIGSFRKKNKTLYVGGIDGALNSKHLKPAQI"
            "ESRIRFVFSRLGDIDRIRYVESKNCGFVKFKYQANAEFAKEAMSNQTLLLPSDKEWDDRREGTGLLVKWAN"
        ]
        assert write_fasta(df1, "A") == "\n".join(all_text) + "\n"

        # Pick chains from expanded read
        picked_text = [
            ">2TEP:A",
            "AETVSFNFNSFSEGNPAINFQGDVTVLSNGNIQLTNLNKVNSVGRVLYAMPVRIWSSATGNVASFLTSFSFEMKDIKDYDPA"
            "DGIIFFIAPEDTQIPAGSIGGGTLGVSDTKGAGHFVGVEFDTYSNSEYNDPPTDHVGIDVNSVDSVKTVPWNSVSGAVVKVT"
            "VIYDSSTKTLSVAVTNDNGDITTIAQVVDLKAKLPERVKFGFSASGSLGGRQIHLIRSWSFTSTLITTTRRS",
            ">3TP2:A",
            "GAMTSWRDKSAKVQVKESELPSSIPAQTGLTFNIWYNKWSQGFAGNTRFVSPFALQPQLHSGKTRGDNDGQLFFCLFFAKGM"
            "CCLGPKCEYLHHIPDEEDIGKLALRTEVLDCFGREKFADYREDMGGIGSFRKKNKTLYVGGIDGALNSKHLKPAQIESRIRF"
            "VFSRLGDIDRIRYVESKNCGFVKFKYQANAEFAKEAMSNQTLLLPSDKEWDDRREGTGLLVKWAN",
            ">2TEP:C",
            "AETVSFNFNSFSEGNPAINFQGDVTVLSNGNIQLTNLNKVNSVGRVLYAMPVRIWSSATGNVASFLTSFSFEMKDIKDYDPA"
            "DGIIFFIAPEDTQIPAGSIGGGTLGVSDTKGAGHFVGVEFDTYSNSEYNDPPTDHVGIDVNSVDSVKTVPWNSVSGAVVKVT"
            "VIYDSSTKTLSVAVTNDNGDITTIAQVVDLKAKLPERVKFGFSASGSLGGRQIHLIRSWSFTSTLITTTRRS"
        ]
        assert write_fasta(df2, "AC") == "\n".join(picked_text) + "\n"

        # Individual prints
        write_fasta(df2,
                    "A",
                    filename=os.path.join(self.tmpdir, "singles.fa"),
                    split=True)

        for _ in ["singles_f0001.fa", "singles_f0002.fa"]:
            df = read_fasta(os.path.join(self.tmpdir, _))
            assert df.shape == (1, 2)
            newseq = df["sequence_A"].values[0]
            newid = df["description"].values[0]
            expected = df2[df2["description"] == newid.split(":")
                           [0]]["sequence_A"].values[0]
            assert expected == newseq

        # plot
        fig = plt.figure(figsize=(25, 10))
        ax = plt.subplot2grid((1, 1), (0, 0))
        sequence_frequency_plot(df1,
                                'A',
                                ax,
                                refseq=False,
                                key_residues='12-35',
                                clean_unused=0)
        return fig

    def test_hmm(self):
        df = read_hmmsearch(os.path.join(self.dirpath, 'search.hmm.gz'))
        assert df.shape[0] == 4932
        assert len(df['description'].unique()) == 4927
        assert df[df['full-e-value'] < 10].shape[0] == 2650

        df = read_hmmsearch(os.path.join(self.dirpath, 'search2.hmm.gz'))
        assert df.shape[0] == 11
        assert len(df.iloc[0]['sequence']) == 87

        df = read_hmmsearch(os.path.join(self.dirpath, 'scan.hmm.gz'))
        assert df.shape[0] == 9
        assert len(df.iloc[0]['sequence']) == 99

    def test_pymol(self):
        df = parse_rosetta_file(
            os.path.join(self.dirpath, 'input_2seq.minisilent.gz'),
            {'sequence': 'B'})
        df.add_reference_sequence('B', df.iloc[0].get_sequence('B'))
        df = df.identify_mutants('B').head()

        pick1 = ""
        pick2 = "sele test_3lhp_binder_labeled_00002_mut, test_3lhp_binder_labeled_00002 and " \
                "((c. B and (i. 1-2 or i. 7-9 or i. 11-12 or i. 14-17 or i. 19 or i. 21-23 or " \
                "i. 25-27 or i. 31-33 or i. 35-39 or i. 42 or i. 45 or i. 48 or i. 52 or " \
                "i. 64-68 or i. 70-75 or i. 77 or i. 79-82 or i. 84-86 or i. 88-89 or " \
                "i. 91-102 or i. 104-111 or i. 113-116)))"
        sel = pymol_mutant_selector(df)
        assert len(sel[0]) == 0
        assert sel[0] == pick1
        assert len(sel[1]) != 0
        assert sel[1] == pick2

    def test_master(self):
        df = parse_master_file(os.path.join(self.dirpath, 'master.search'),
                               max_rmsd=1.4,
                               piece_count=2,
                               shift_0=True)
        assert df.rmsd.max() == 1.3967
        assert df.shape == (42, 5)
        assert df.iloc[-1].match == [[34, 40], [42, 48]]