Esempio n. 1
0
    def test_topology(self):
        c = Case('test_topology')
        c = c.add_topology('A2E.A1E.B1H.A3E.B2H.A5E.A4E')
        assert c.shape == (5, 2)
        assert c.shape_len == ((7, 7, 7, 7, 7), (13, 13))
        assert c.architecture_str == '5E.2H'
        assert c.connectivity_count == 1
        assert c.connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A5E.A4E', )
        c = c.add_topology('A2E.A1E.B1H.A3E.B2H.A5E.A4E')
        with pytest.raises(ValidationError) as message:
            c = c.add_topology('A2E.A1E.B1H.A3E.B2H.A5E.A4E.B3H')
        assert message.value.messages == [
            'Provided topology does not match existing architecture.'
        ]
        assert c.connectivity_count == 1
        assert c.connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A5E.A4E', )
        cs = c.apply_topologies()
        assert len(cs) == 1
        assert cs[0].shape_len == c.shape_len
        assert cs[0].shape == c.shape
        assert cs[0].connectivities_str == c.connectivities_str

        c = Case('test_topology')
        c = c.add_architecture('5E:8:8:7:7:7.2H:18:19')
        c = c.add_topology('A2E8.A1E8.B1H18.A3E7.B2H19.A5E7.A4E7')
        c = c.add_topology('A2E8.A1E8.B1H18.A3E7.B2H19.A4E7.A5E7')
        c = c.add_topology('A1E8.A2E8.B1H18.A3E7.B2H19.A4E7.A5E7')
        assert c.shape == (5, 2)
        assert c.shape_len == ((8, 8, 7, 7, 7), (18, 19))
        assert c.architecture_str == '5E.2H'
        assert c.connectivity_count == 3
        assert c.connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A5E.A4E',
                                        'A2E.A1E.B1H.A3E.B2H.A4E.A5E',
                                        'A1E.A2E.B1H.A3E.B2H.A4E.A5E')
        cs = c.apply_topologies()
        assert len(cs) == 3
        assert cs[0].connectivity_count == 1
        assert cs[0].connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A5E.A4E', )
        assert cs[1].connectivity_count == 1
        assert cs[1].connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A4E.A5E', )
        assert cs[2].connectivity_count == 1
        assert cs[2].connectivities_str == ('A1E.A2E.B1H.A3E.B2H.A4E.A5E', )

        fig = plt.figure(figsize=(15, 5))
        ax1 = plt.subplot2grid((1, 3), (0, 0), fig=fig)
        plot_case_sketch(cs[0], ax1)
        ax2 = plt.subplot2grid((1, 3), (0, 1), fig=fig)
        plot_case_sketch(cs[1], ax2)
        ax3 = plt.subplot2grid((1, 3), (0, 2), fig=fig)
        plot_case_sketch(cs[2], ax3)
        return fig
Esempio n. 2
0
    def single_execute(self, data: Dict) -> Dict:
        kase = Case(data)
        # Loop MASTER is only applied to a Case with one single connectivity
        if kase.connectivity_count != 1:
            err = f'{self.nodeID} can only be applied to one connectivity. '
            err += f'Current case contains a total of {kase.connectivity_count}.'
            raise NodeDataError(err)
        # And has to be reoriented
        if not kase.is_reoriented:
            self.log.debug(
                'Topology was provided without oriented SSE -> orienting.')
            kase = kase.apply_topologies()[0]

        # Generate the folder tree for a single connectivity.
        folders = kase.connectivities_paths[0].joinpath('loopgroup_master')
        folders.mkdir(parents=True, exist_ok=True)

        # Global step distance
        loop_step = kase.cast_absolute(
        )['configuration.defaults.distance.loop_step']

        # Output keys
        kase.data.setdefault('metadata', {}).setdefault('loop_fragments', [])
        kase.data.setdefault('metadata', {}).setdefault('loop_lengths', [])

        # Find steps: Each pair of secondary structure.
        #it = kase.connectivities_str[0].split('.')
        #steps = [it[i:i + 2] for i in range(0, len(it) - 1)]
        lengths = kase.connectivity_len[0]
        start = 1

        for i, (group, infos) in enumerate(self.steps.items()):
            self.log.info(f'Search at: {group}')

            # 1. Make folders and files
            wfolder = folders.joinpath(f'loopgroup{i + 1:02d}')
            wfolder.mkdir(parents=True, exist_ok=True)
            outfile = wfolder.joinpath(f'loopgroup_master.iter{i + 1:02d}.pdb')
            outfilePDS = wfolder.joinpath(
                f'loopgroup_master.iter{i + 1:02d}.pds')
            masfile = outfile.with_suffix('.master')

            gr = self.steps[f'group{i + 1:02d}'][-1].split(';')
            gr = [int(g) for g in gr if g != 'x']
            for g in gr:
                checkpoint = wfolder.joinpath(f'loop{g:02d}/checkpoint.json')
                # 2. Check if checkpoint exists, retrieve and skip
                reload = TButil.checkpoint_in(self.log, checkpoint)
                if reload is not None:
                    self.log.debug(
                        f'Reloading loopgroup{i + 1:02d} with loop{g:02d}')
                    kase.data['metadata']['loop_fragments'].append(reload)
                    kase.data['metadata']['loop_lengths'].append(
                        int(reload['edges']['loop']))
                    start += (int(reload['edges']['sse1']) +
                              int(reload['edges']['loop']))
                    continue

            # 3. Check hairpin
            # Get SSEs and identifiers
            sses = [kase.get_sse_by_id(sse) for sse in infos[0]]
            #sse1_name, sse2_name = sse1['id'], sse2['id']
            #is_hairpin = self.check_hairpin(sse1_name, sse2_name)

            # 4. Generate structures
            sses = TBstructure.build_pdb_object(self.log,
                                                sses,
                                                5,
                                                concat=False,
                                                outfile=outfile)

            if not masfile.is_file():
                # 5. calculate expected loop length by loop_step
                #Mdis, mdis = TBstructure.get_loop_length(self.log, sse1, sse2, loop_step, self.loop_range)

                # 6. Run MASTER
                #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds')
                self.log.debug(f'FILE {outfilePDS}')
                # -> make PDS query
                cmd = TBMaster.createPDS(outfile, outfilePDS)
                self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                run(cmd, stdout=DEVNULL)
                # -> run MASTER
                cmd = TBMaster.master_groupedgap(outfilePDS, self.pdsdb,
                                                 masfile, infos[1],
                                                 self.rmsd_cut)
                self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                result = run(cmd, stdout=DEVNULL)

                # TODO: implement motif compability
                # if result.returncode: # no loop between that connection, e.g. a motif ranging over multiple sse with keeping the loops
                #     # 4. Generate structures
                #     self.log.debug('generate combined structure')
                #     sse = pd.concat([sse1, sse2], sort=False)
                #
                #     # 6. Run MASTER
                #     self.log.debug(Path(outfile))
                #     #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds')
                #     self.log.debug(f'FILE {outfilePDS}')
                #     # -> make PDS query
                #     cmd = TBMaster.createPDS(outfile, outfilePDS)
                #     self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                #     run(cmd, stdout=DEVNULL)
                #     # -> run MASTER
                #     cmd = TBMaster.master_nogap(outfilePDS, self.pdsdb, masfile, self.rmsd_cut)
                #     self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                #     run(cmd, stdout=DEVNULL)
                #
                #     # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files)
                #     match_count = self.minimize_master_file(masfile)
                #     self.log.debug(f'match count here {match_count}')
                #
                #     # 7. Retrieve MASTER data
                #     dfloop = self.process_master_data_no_gap(masfile, sse1_name, sse2_name)
                #     sse1l, loopl, sse2l = lengths[i], int(dfloop['loop_length'].values[0]), lengths[i + 1]
                #     total_len = sse1l + loopl + sse2l
                #     end_edge = total_len + start - 1
                #     edges = {'ini': int(start), 'end': int(end_edge), 'sse1': int(sse1l), 'loop': int(loopl), 'sse2': int(sse2l)}
                #     self.log.debug(f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}')
                #     self.log.debug(dfloop.to_string())
                #
                #     # 8. Bring and Combine fragments from the different sources.
                #     loop_data = self.make_fragment_files(dfloop, edges, masfile)
                #     loop_data['match_count'] += match_count

                #else:

                # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files)
                match_count = self.minimize_master_file(masfile)
                # 7. Retrieve MASTER data
                df_container = self.process_master_data(
                    masfile, infos[0], infos[1], infos[2])

                for indx in list(df_container.order.drop_duplicates()):
                    dfloop = df_container[df_container.order == indx]
                    sse1l, loopl, sse2l = lengths[i], int(
                        dfloop['loop_length'].values[0]), lengths[i + 1]
                    total_len = sse1l + loopl + sse2l
                    end_edge = total_len + start - 1
                    edges = {
                        'ini': int(start),
                        'end': int(end_edge),
                        'sse1': int(sse1l),
                        'loop': int(loopl),
                        'sse2': int(sse2l)
                    }
                    self.log.debug(
                        f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}'
                    )
                    self.log.debug(dfloop.to_string())

                    # 8. Bring and Combine fragments from the different sources.
                    loop_data, nfolder = self.make_fragment_files(dfloop,
                                                                  edges,
                                                                  masfile,
                                                                  no_loop=True)
                    loop_data['match_count'] += match_count

                    # 9. Save data in the Case
                    kase.data['metadata']['loop_fragments'].append(loop_data)
                    kase.data['metadata']['loop_lengths'].append(int(loopl))
                    start += (sse1l + loopl)

                    # 10. Checkpoint save
                    checkpoint = nfolder.joinpath('checkpoint.json')
                    TButil.checkpoint_out(self.log, checkpoint, loop_data)
        return kase