Esempio n. 1
0
    def test_quality_plot(self):
        df3 = parse_rosetta_fragments(self.frag3).sample_top_neighbors()
        df9 = parse_rosetta_fragments(self.frag9)
        # auto-load
        df3 = df3.add_quality_measure(None)
        # load target quality file
        with pytest.raises(ValueError):
            df9 = df9.add_quality_measure(self.frag3q)
        df9 = df9.add_quality_measure(self.frag9q)

        assert df3.is_comparable(df9) is False

        assert 'rmsd' in df3
        assert 'rmsd' in df9

        consensus_seq = df9.select_quantile().quick_consensus_sequence()
        consensus_sse = df9.select_quantile(
        ).quick_consensus_secondary_structure()

        assert consensus_seq == "KIPVPVVVNGKIVAVVVVPPENLEEALLEALKELGLIKDPEEVKAVVVSPDGRLELSF"
        assert consensus_sse == "EEEEEEEELLEEEEEEEELLLLHHHHHHHHHHHHLLLLLLLLLLEEEEELLLEEEEEE"

        fig = plt.figure(figsize=(25, 10))
        plot_fragment_profiles(fig, df3, df9, consensus_seq, consensus_sse)
        plt.tight_layout()
        return fig
Esempio n. 2
0
    def test_frequency_matrices_and_networks(self):
        df3 = parse_rosetta_fragments(self.frag3)
        df9 = parse_rosetta_fragments(self.frag9)
        # auto-load
        df3 = df3.add_quality_measure(None)
        # load target quality file
        df9 = df9.add_quality_measure(self.frag9q)

        matrix = df3.select_quantile(0.1).make_sequence_matrix()
        assert matrix.min().min() == -9

        matrix = df9.select_quantile(0.1).make_sequence_matrix(frequency=True)
        G = df9.select_quantile(0.1).make_per_position_frequency_network()
        Gf = df9.select_quantile(0.1).make_frequency_network()

        assert matrix.shape == (58, 20)
        assert G.number_of_edges() > Gf.number_of_edges()

        value = 1 - G.get_edge_data("0X", "1A")['weight']
        assert matrix["A"].values[0] == pytest.approx(value)

        n = 6
        target = str(n + 1) + "R"
        for aa in list("ARNDCQEGHILKMFPSTWYV"):
            origin = str(n) + aa
            if origin in G:
                value = 1 - G.get_edge_data(origin, target)['weight']
                assert matrix["R"].values[n] == pytest.approx(value)
Esempio n. 3
0
    def test_concat_fragments(self):
        # load fragments
        _3mers = parse_rosetta_fragments(self.frag3)
        # make chunks
        _3mers_1 = _3mers[(_3mers['frame'] >= 9) & (_3mers['frame'] <= 12)]
        _3mers_2 = _3mers[(_3mers['frame'] >= 22) & (_3mers['frame'] <= 24)]
        _3mers_3 = _3mers[(_3mers['frame'] >= 45) & (_3mers['frame'] <= 46)]
        # mix fragments
        m = concat_fragments([_3mers_3, _3mers_1, _3mers_2])

        # checkpoints
        assert len(m) == 5400
        assert list(m.drop_duplicates('frame')['frame']) == list(range(1, 10))

        f1 = NamedTemporaryFile(delete=False)
        f1.close()
        nonstrict = write_rosetta_fragments(m, 3, 300, f1.name, False)

        f2 = NamedTemporaryFile(delete=False)
        f2.close()
        isstrict = write_rosetta_fragments(m.renumber(10).top_limit(30),
                                           prefix=f2.name,
                                           strict=True)

        assert not parse_rosetta_fragments(nonstrict).is_comparable(
            parse_rosetta_fragments(isstrict))
def main( options ):
    # Read Fragment Files
    small_f = parse_rosetta_fragments(options.fsmall)
    large_f = parse_rosetta_fragments(options.flarge)

    # Read or calculate Fragment Quality
    small_f = small_f.add_quality_measure(options.qsmall, options.pdb)
    large_f = large_f.add_quality_measure(options.qlarge, options.pdb)

    # Plot
    fig  = plt.figure(figsize=(40, 10) if options.format == "h" else (20, 20))
    grid = (1, 2) if options.format == "h" else (2, 1)

    ax00 = plt.subplot2grid(grid, (0, 0), fig=fig)
    ax01 = plt.subplot2grid(grid, (0, 1) if options.format == "h" else (1, 0), fig=fig)

    ax00.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    ax01.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))

    plot_fragments(small_f, large_f, small_ax=ax00, large_ax=ax01,
                   showfliers=False, titles="top" if options.format == "h" else "right")

    if options.format == "h":
        plt.tight_layout(pad=2)
    else:
        plt.tight_layout(rect=(0.037, 0, 1, 1))

    # Write to file
    if options.ofile is not None:
        plt.savefig(options.ofile)

    # Show on screen
    if not options.silent:
        plt.show()
    return fig
Esempio n. 5
0
    def test_add_fragments_append(self):
        df = parse_rosetta_fragments(self.frag3)
        xx = df[(df['frame'] <= 10) & (df['neighbor'] <= 100)]
        dfrep = df.add_fragments(xx, 10, 'append')

        fig = plt.figure(figsize=(25, 10))
        ax0 = plt.subplot2grid((2, 1), (0, 0))
        prange = range(len(dfrep.groupby('frame')))
        ax0.bar(prange,
                [max(y['neighbor']) for x, y in dfrep.groupby('frame')])
        ax1 = plt.subplot2grid((2, 1), (1, 0))
        ax1.bar(
            prange,
            [y['neighbors'].unique()[0] for x, y in dfrep.groupby('frame')])
        plt.tight_layout()
        return fig
Esempio n. 6
0
    def make_fragment_files(self,
                            dfloop: pd.DataFrame,
                            edges: Dict,
                            masfile: Path,
                            no_loop: Optional[bool] = True) -> Dict:
        """Combin the fragments from the different matches.
        """
        data = {
            'loop_length': int(dfloop.iloc[0]['loop_length']),
            'abego': list(dfloop['loop'].values),
            'edges': edges,
            'fragfiles': [],
            'match_count': 0
        }

        dfs3 = []
        dfs9 = []
        sample = math.ceil(200 / dfloop.shape[0])
        if not no_loop:
            for i, row in dfloop.iterrows():
                # Remember: MASTER match starts with 0!
                dfs3.append((parse_rosetta_fragments(
                    str(row['3mers']),
                    source=f'{row["pdb"]}_{row["chain"]}').slice_region(
                        row['start'] + 1,
                        row['stop'] + 1).sample_top_neighbors(sample).renumber(
                            edges['ini']).top_limit(edges['end'])))
                dfs9.append((parse_rosetta_fragments(
                    str(row['9mers']),
                    source=f'{row["pdb"]}_{row["chain"]}').slice_region(
                        row['start'] + 1,
                        row['stop'] + 1).sample_top_neighbors(sample).renumber(
                            edges['ini']).top_limit(edges['end'])))
        else:
            for i, row in dfloop.iterrows():
                # Remember: MASTER match starts with 0!
                dfs3.append((parse_rosetta_fragments(
                    str(row['3mers']),
                    source=f'{row["pdb"]}_{row["chain"]}').slice_region(
                        row['start'] + 1,
                        row['stop'] + 1).sample_top_neighbors(sample).renumber(
                            edges['ini']).top_limit(edges['end'])))
                dfs9.append((parse_rosetta_fragments(
                    str(row['9mers']),
                    source=f'{row["pdb"]}_{row["chain"]}').slice_region(
                        row['start'] + 1,
                        row['stop'] + 1).sample_top_neighbors(sample).renumber(
                            edges['ini']).top_limit(edges['end'])))

        # Merge Fragments
        dfs3all = dfs3[0]
        dfs9all = dfs9[0]
        for i in range(1, len(dfs3)):
            dfs3all = dfs3all.add_fragments(dfs3[i],
                                            ini=edges['ini'],
                                            how='append')
            dfs9all = dfs9all.add_fragments(dfs9[i],
                                            ini=edges['ini'],
                                            how='append')
        dfs3all = dfs3all.sample_top_neighbors(200)
        dfs9all = dfs9all.sample_top_neighbors(200)

        # set up
        lord = int(dfloop.order.drop_duplicates().values[0])
        nfolder = masfile.parent.absolute().joinpath(f'loop{int(lord):02d}')
        nfolder.mkdir(parents=True, exist_ok=True)
        masfile2 = str(nfolder.joinpath(f'jump{int(lord):02d}'))

        self.log.debug('Writing 3mers fragfile\n')
        #data['fragfiles'].append(write_rosetta_fragments(dfs3all, prefix=str(masfile.with_suffix('')), strict=True))
        data['fragfiles'].append(
            write_rosetta_fragments(dfs3all, prefix=masfile2, strict=True))
        self.log.debug(f'3mers fragfile: {data["fragfiles"][-1]}\n')

        self.log.debug('Writing 9mers fragfile\n')
        #data['fragfiles'].append(write_rosetta_fragments(dfs9all, prefix=str(masfile.with_suffix('')), strict=True))
        data['fragfiles'].append(
            write_rosetta_fragments(dfs9all, prefix=masfile2, strict=True))
        self.log.debug(f'9mers fragfile: {data["fragfiles"][-1]}\n')

        dfs3all.drop(columns=[
            'pdb', 'frame', 'neighbors', 'neighbor', 'aa', 'sse', 'phi', 'psi',
            'omega'
        ]).to_csv(data['fragfiles'][0] + '.csv', index=False)
        dfs9all.drop(columns=[
            'pdb', 'frame', 'neighbors', 'neighbor', 'aa', 'sse', 'phi', 'psi',
            'omega'
        ]).to_csv(data['fragfiles'][1] + '.csv', index=False)
        imageprefix = Path(masfile2).with_suffix('.fragprofile')
        TBPlot.plot_fragment_templates(self.log, dfs3all, dfs9all, imageprefix)

        return data, nfolder
Esempio n. 7
0
def make_fragment_files(dfloop: pd.DataFrame, edges: Dict,
                        masfile: Path) -> Dict:
    """
    """
    data = {
        'loop_length': int(dfloop.iloc[0]['loop_length']),
        'abego': list(dfloop['loop'].values),
        'edges': edges,
        'fragfiles': [],
        'match_count': 0
    }

    dfs3 = []
    dfs9 = []
    sample = math.ceil(200 / dfloop.shape[0])
    for i, row in dfloop.iterrows():
        # Remember: MASTER match starts with 0!
        dfs3.append((parse_rosetta_fragments(
            str(row['3mers']),
            source='{}_{}'.format(row['pdb'], row['chain'])).slice_region(
                row['match'][0][0] + 1,
                row['match'][1][1] + 1).sample_top_neighbors(sample).renumber(
                    edges['ini']).top_limit(edges['end'])))
        dfs9.append((parse_rosetta_fragments(
            str(row['9mers']),
            source='{}_{}'.format(row['pdb'], row['chain'])).slice_region(
                row['match'][0][0] + 1,
                row['match'][1][1] + 1).sample_top_neighbors(sample).renumber(
                    edges['ini']).top_limit(edges['end'])))

    # Merge Fragments
    dfs3all = dfs3[0]
    dfs9all = dfs9[0]
    for i in range(1, len(dfs3)):
        dfs3all = dfs3all.add_fragments(dfs3[i],
                                        ini=edges['ini'],
                                        how='append')
        dfs9all = dfs9all.add_fragments(dfs9[i],
                                        ini=edges['ini'],
                                        how='append')
    dfs3all = dfs3all.sample_top_neighbors(200)
    dfs9all = dfs9all.sample_top_neighbors(200)

    if TBcore.get_option('system', 'debug'):
        sys.stdout.write('Writing 3mers fragfile\n')
    data['fragfiles'].append(
        write_rosetta_fragments(dfs3all,
                                prefix=str(masfile.with_suffix('')),
                                strict=True))
    if TBcore.get_option('system', 'debug'):
        sys.stdout.write('3mers fragfile: {}\n'.format(data['fragfiles'][-1]))
        sys.stdout.write('Writing 9mers fragfile\n')
    data['fragfiles'].append(
        write_rosetta_fragments(dfs9all,
                                prefix=str(masfile.with_suffix('')),
                                strict=True))
    if TBcore.get_option('system', 'debug'):
        sys.stdout.write('9mers fragfile: {}\n'.format(data['fragfiles'][-1]))

    dfs3all.drop(columns=[
        'pdb', 'frame', 'neighbors', 'neighbor', 'aa', 'sse', 'phi', 'psi',
        'omega'
    ]).to_csv(data['fragfiles'][0] + '.csv', index=False)
    dfs9all.drop(columns=[
        'pdb', 'frame', 'neighbors', 'neighbor', 'aa', 'sse', 'phi', 'psi',
        'omega'
    ]).to_csv(data['fragfiles'][1] + '.csv', index=False)
    imageprefix = masfile.with_suffix('.fragprofile')
    TButil.plot_fragment_templates(dfs3all, dfs9all, imageprefix)

    return data
Esempio n. 8
0
    TButil.checkpoint_out(checkpoint, data)

    return case


def loop_master_protocol(case: Case, folders: Path) -> Tuple[str, str]:
    """
    """
    lf = case['metadata.loop_fragments']
    if lf is None:
        raise TButil.PluginOrderError(
            'Data that should be loaded through loop_master is not found.')

    for i, loop in enumerate(lf):
        if i == 0:
            ff3 = parse_rosetta_fragments(loop['fragfiles'][0])
            ff9 = parse_rosetta_fragments(loop['fragfiles'][1])
            df3 = [
                pd.read_csv(str(loop['fragfiles'][0]) + '.csv'),
            ]
            df9 = [
                pd.read_csv(str(loop['fragfiles'][1]) + '.csv'),
            ]
        else:
            df3.append(pd.read_csv(str(loop['fragfiles'][0]) + '.csv'))
            df9.append(pd.read_csv(str(loop['fragfiles'][1]) + '.csv'))
            ff3 = ff3.add_fragments(parse_rosetta_fragments(
                loop['fragfiles'][0]),
                                    ini=int(loop['edges']['ini']),
                                    how='append')
            ff9 = ff9.add_fragments(parse_rosetta_fragments(
Esempio n. 9
0
        # Checkpoint save
        TButil.checkpoint_out(self.log, checkpoint, data)
        return case

    def loop_master_protocol(self, case: Case,
                             folders: Path) -> Tuple[str, str]:
        """
        """
        lf = case['metadata.loop_fragments']
        if lf is None:
            raise NodeMissingError(
                'Data that should be loaded through loop_master is not found.')

        for i, loop in enumerate(lf):
            if i == 0:
                ff3 = parse_rosetta_fragments(loop['fragfiles'][0])
                ff9 = parse_rosetta_fragments(loop['fragfiles'][1])
                df3 = [
                    pd.read_csv(str(loop['fragfiles'][0]) + '.csv'),
                ]
                df9 = [
                    pd.read_csv(str(loop['fragfiles'][1]) + '.csv'),
                ]
            else:
                df3.append(pd.read_csv(str(loop['fragfiles'][0]) + '.csv'))
                df9.append(pd.read_csv(str(loop['fragfiles'][1]) + '.csv'))
                ff3 = ff3.add_fragments(parse_rosetta_fragments(
                    loop['fragfiles'][0]),
                                        ini=int(loop['edges']['ini']),
                                        how='append')
                ff9 = ff9.add_fragments(parse_rosetta_fragments(