Beispiel #1
0
        },
        'silent_files': {
            'folding': [],
            'design': []
        },
        'minisilent': {
            'folding': '',
            'design': ''
        }
    }

    # Generate the folder tree for a single connectivity.
    wpaths = utils.folder_structure(case)

    # Check if checkpoint exists, retrieve and skip
    reload = TButil.checkpoint_in(wpaths['checkpoint'])
    if reload is not None:
        case.data['metadata']['funfoldes'] = reload
        return case

    # We need to check that the rosetta_scripts executable is available
    if not data['cmd']['folding'][0].is_file() or not os.access(
            str(data['cmd']['folding'][0]), os.X_OK):
        raise IOError('Cannot find executable {}'.format(
            data['cmd']['folding'][0]))

    # Build the structure
    utils.build_template_sketch(case, wpaths['pdb'])

    # Make the Folding and Design RScripts
    data = utils.make_scripts(case, wpaths, data, natbias, layer_design)
Beispiel #2
0
    def single_execute(self, data: Dict) -> Dict:
        kase = Case(data)
        # Loop MASTER is only applied to a Case with one single connectivity
        if kase.connectivity_count != 1:
            err = f'{self.nodeID} can only be applied to one connectivity. '
            err += f'Current case contains a total of {kase.connectivity_count}.'
            raise NodeDataError(err)
        # And has to be reoriented
        if not kase.is_reoriented:
            self.log.debug(
                'Topology was provided without oriented SSE -> orienting.')
            kase = kase.apply_topologies()[0]

        # Generate the folder tree for a single connectivity.
        folders = kase.connectivities_paths[0].joinpath('loopgroup_master')
        folders.mkdir(parents=True, exist_ok=True)

        # Global step distance
        loop_step = kase.cast_absolute(
        )['configuration.defaults.distance.loop_step']

        # Output keys
        kase.data.setdefault('metadata', {}).setdefault('loop_fragments', [])
        kase.data.setdefault('metadata', {}).setdefault('loop_lengths', [])

        # Find steps: Each pair of secondary structure.
        #it = kase.connectivities_str[0].split('.')
        #steps = [it[i:i + 2] for i in range(0, len(it) - 1)]
        lengths = kase.connectivity_len[0]
        start = 1

        for i, (group, infos) in enumerate(self.steps.items()):
            self.log.info(f'Search at: {group}')

            # 1. Make folders and files
            wfolder = folders.joinpath(f'loopgroup{i + 1:02d}')
            wfolder.mkdir(parents=True, exist_ok=True)
            outfile = wfolder.joinpath(f'loopgroup_master.iter{i + 1:02d}.pdb')
            outfilePDS = wfolder.joinpath(
                f'loopgroup_master.iter{i + 1:02d}.pds')
            masfile = outfile.with_suffix('.master')

            gr = self.steps[f'group{i + 1:02d}'][-1].split(';')
            gr = [int(g) for g in gr if g != 'x']
            for g in gr:
                checkpoint = wfolder.joinpath(f'loop{g:02d}/checkpoint.json')
                # 2. Check if checkpoint exists, retrieve and skip
                reload = TButil.checkpoint_in(self.log, checkpoint)
                if reload is not None:
                    self.log.debug(
                        f'Reloading loopgroup{i + 1:02d} with loop{g:02d}')
                    kase.data['metadata']['loop_fragments'].append(reload)
                    kase.data['metadata']['loop_lengths'].append(
                        int(reload['edges']['loop']))
                    start += (int(reload['edges']['sse1']) +
                              int(reload['edges']['loop']))
                    continue

            # 3. Check hairpin
            # Get SSEs and identifiers
            sses = [kase.get_sse_by_id(sse) for sse in infos[0]]
            #sse1_name, sse2_name = sse1['id'], sse2['id']
            #is_hairpin = self.check_hairpin(sse1_name, sse2_name)

            # 4. Generate structures
            sses = TBstructure.build_pdb_object(self.log,
                                                sses,
                                                5,
                                                concat=False,
                                                outfile=outfile)

            if not masfile.is_file():
                # 5. calculate expected loop length by loop_step
                #Mdis, mdis = TBstructure.get_loop_length(self.log, sse1, sse2, loop_step, self.loop_range)

                # 6. Run MASTER
                #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds')
                self.log.debug(f'FILE {outfilePDS}')
                # -> make PDS query
                cmd = TBMaster.createPDS(outfile, outfilePDS)
                self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                run(cmd, stdout=DEVNULL)
                # -> run MASTER
                cmd = TBMaster.master_groupedgap(outfilePDS, self.pdsdb,
                                                 masfile, infos[1],
                                                 self.rmsd_cut)
                self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                result = run(cmd, stdout=DEVNULL)

                # TODO: implement motif compability
                # if result.returncode: # no loop between that connection, e.g. a motif ranging over multiple sse with keeping the loops
                #     # 4. Generate structures
                #     self.log.debug('generate combined structure')
                #     sse = pd.concat([sse1, sse2], sort=False)
                #
                #     # 6. Run MASTER
                #     self.log.debug(Path(outfile))
                #     #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds')
                #     self.log.debug(f'FILE {outfilePDS}')
                #     # -> make PDS query
                #     cmd = TBMaster.createPDS(outfile, outfilePDS)
                #     self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                #     run(cmd, stdout=DEVNULL)
                #     # -> run MASTER
                #     cmd = TBMaster.master_nogap(outfilePDS, self.pdsdb, masfile, self.rmsd_cut)
                #     self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                #     run(cmd, stdout=DEVNULL)
                #
                #     # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files)
                #     match_count = self.minimize_master_file(masfile)
                #     self.log.debug(f'match count here {match_count}')
                #
                #     # 7. Retrieve MASTER data
                #     dfloop = self.process_master_data_no_gap(masfile, sse1_name, sse2_name)
                #     sse1l, loopl, sse2l = lengths[i], int(dfloop['loop_length'].values[0]), lengths[i + 1]
                #     total_len = sse1l + loopl + sse2l
                #     end_edge = total_len + start - 1
                #     edges = {'ini': int(start), 'end': int(end_edge), 'sse1': int(sse1l), 'loop': int(loopl), 'sse2': int(sse2l)}
                #     self.log.debug(f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}')
                #     self.log.debug(dfloop.to_string())
                #
                #     # 8. Bring and Combine fragments from the different sources.
                #     loop_data = self.make_fragment_files(dfloop, edges, masfile)
                #     loop_data['match_count'] += match_count

                #else:

                # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files)
                match_count = self.minimize_master_file(masfile)
                # 7. Retrieve MASTER data
                df_container = self.process_master_data(
                    masfile, infos[0], infos[1], infos[2])

                for indx in list(df_container.order.drop_duplicates()):
                    dfloop = df_container[df_container.order == indx]
                    sse1l, loopl, sse2l = lengths[i], int(
                        dfloop['loop_length'].values[0]), lengths[i + 1]
                    total_len = sse1l + loopl + sse2l
                    end_edge = total_len + start - 1
                    edges = {
                        'ini': int(start),
                        'end': int(end_edge),
                        'sse1': int(sse1l),
                        'loop': int(loopl),
                        'sse2': int(sse2l)
                    }
                    self.log.debug(
                        f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}'
                    )
                    self.log.debug(dfloop.to_string())

                    # 8. Bring and Combine fragments from the different sources.
                    loop_data, nfolder = self.make_fragment_files(dfloop,
                                                                  edges,
                                                                  masfile,
                                                                  no_loop=True)
                    loop_data['match_count'] += match_count

                    # 9. Save data in the Case
                    kase.data['metadata']['loop_fragments'].append(loop_data)
                    kase.data['metadata']['loop_lengths'].append(int(loopl))
                    start += (sse1l + loopl)

                    # 10. Checkpoint save
                    checkpoint = nfolder.joinpath('checkpoint.json')
                    TButil.checkpoint_out(self.log, checkpoint, loop_data)
        return kase
Beispiel #3
0
class hybridize(Node):
    """Run Rosettas hybridize protocol to generate designs.

    .. caution::
        Due to the ``FastDesignMover``, this :class:`funfoldes` may take *a lot* of time. If possible, please
        use the ``slurm.use`` configuration. In case this is not possible, you may reduce the number of decoys
        to be generated via the `nstruct` parameter option.


    :param nstruct: Number of decoys to be generated (default: 2000).
    :param natbias: Score function bias towards per secondary structure types (default: 2.5).
    :param layer_design: If :class:`funfoldes` should a layer design approach (default: True).

    :raises:
        :NodeDataError: On **check**. If the required fields to be executed are not there.
        :NodeMissingError: On **exection**. If required variable inputs are not there.
    """
    REQUIRED_FIELDS = ('metadata.fragments', 'metadata.loop_lengths')
    RETURNED_FIELDS = ('metadata.hybridize')
    VERSION = 'v1.0'

    def __init__(self,
                 tag: int,
                 nstruct: Optional[int] = 2000,
                 natbias: Optional[float] = 2.5,
                 layer_design: Optional[bool] = True) -> Case:
        super(hybridize, self).__init__(tag)

        self.nstruct = nstruct
        self.natbias = natbias
        self.layer_design = layer_design

    def single_check(self, dummy: Dict) -> Dict:
        kase = Case(dummy)

        # Check what it needs
        for itag in self.REQUIRED_FIELDS:
            if kase[itag] is None:
                raise NodeDataError(f'Field "{itag}" is required')

        # Include what keywords it adds (in this instance, nothing)
        kase.data.setdefault('metadata', {}).setdefault('hybridize', {})
        return kase.data

    def single_execute(self, data: Dict) -> Dict:
        case = Case(data)

        data = {
            'script': {
                'assembly': '',
                'design': ''
            },
            'cmd': {
                'assembly': [
                    Path(TBcore.get_option('rosetta', 'scripts')),
                    '-parser:protocol'
                ],
                'design': [
                    Path(TBcore.get_option('rosetta', 'scripts')),
                    '-parser:protocol'
                ]
            },
            'silent_files': {
                'assembly': [],
                'design': []
            },
            'minisilent': {
                'assembly': '',
                'design': ''
            }
        }

        # Generate the folder tree for a single connectivity.
        wpaths = utils.folder_structure(case)

        # Check if checkpoint exists, retrieve and skip
        reload = TButil.checkpoint_in(self.log, wpaths['checkpoint'])
        if reload is not None:
            case.data['metadata']['hybridize'] = reload
            return case
Beispiel #4
0
    current_case_file = kase.cast_absolute().write(wfolder.joinpath('current'))

    # Find steps: Tops we will submit 2-layer searches
    steps = get_steps([x[-1] == 'E' for x in kase.architecture_str.split('.')])
    steps = [steps[step], ] if step is not None and TBcore.get_option('system', 'jupyter') else steps

    # Work by layers
    done_l = set()
    for i, step in enumerate(steps):
        # Step working directory
        stepfolder = wfolder.joinpath('step{:02d}'.format(i + 1))
        stepfolder.mkdir(parents=True, exist_ok=True)
        query = stepfolder.joinpath('imaster.query{:02d}.pdb'.format(i + 1))
        checkpoint = stepfolder.joinpath('checkpoint.json')

        reload = TButil.checkpoint_in(checkpoint)
        if reload is not None:
            kase.data['metadata']['imaster'].setdefault('step{:02d}'.format(i + 1), reload)
            kase.data['metadata']['corrections'].append(reload['corrections'])
            corrections.update(reload['corrections'])
            done_l.update(reload['layers'])
            # CKase = CKase.apply_corrections(corrections)
            continue

        # Apply corrections from previous steps and rebuild
        CKase = Case(kase).apply_corrections(corrections)
        with TBcore.on_option_value('system', 'overwrite', True):
            CKase = plugin_source.load_plugin('builder').case_apply(CKase, connectivity=True)

        # Generate structure query and get layer displacements
        layers = set(itemgetter(*step)(ascii_uppercase))
Beispiel #5
0
    def single_execute(self, data: Dict) -> Dict:
        case = Case(data)
        data = {'protocol': self.protocol, 'files': []}

        # Fragments can only be made for a full, reoriented Case.
        if case.connectivity_count > 1:
            raise NodeDataError(
                'FunFolDes can only be applied to one connectivity.')

        # Generate the folder tree for a single connectivity.
        folders = case.connectivities_paths[0].joinpath('fragment_maker')
        folders.mkdir(parents=True, exist_ok=True)
        checkpoint = folders.joinpath('checkpoint.json')

        # Check if checkpoint exists, retrieve and skip
        reload = TButil.checkpoint_in(self.log, checkpoint)
        if reload is not None and reload['protocol'] == self.protocol:
            case.data['metadata']['fragments'] = reload
            return case

        # Switch depending on the fragment_protocol
        if self.protocol == 'loop_master':
            frags3, frags9, profile = self.loop_master_protocol(case, folders)
            data['files'] = (frags3, frags9)
            data['profile'] = profile
        if self.protocol == 'loopgroup_master':
            #data['files'] = self.loopgroup_master_protocol(case, folders)
            frags3, frags9, profile = self.loop_master_protocol(case, folders)
            data['files'] = (frags3, frags9)
            data['profile'] = profile