}, 'silent_files': { 'folding': [], 'design': [] }, 'minisilent': { 'folding': '', 'design': '' } } # Generate the folder tree for a single connectivity. wpaths = utils.folder_structure(case) # Check if checkpoint exists, retrieve and skip reload = TButil.checkpoint_in(wpaths['checkpoint']) if reload is not None: case.data['metadata']['funfoldes'] = reload return case # We need to check that the rosetta_scripts executable is available if not data['cmd']['folding'][0].is_file() or not os.access( str(data['cmd']['folding'][0]), os.X_OK): raise IOError('Cannot find executable {}'.format( data['cmd']['folding'][0])) # Build the structure utils.build_template_sketch(case, wpaths['pdb']) # Make the Folding and Design RScripts data = utils.make_scripts(case, wpaths, data, natbias, layer_design)
def single_execute(self, data: Dict) -> Dict: kase = Case(data) # Loop MASTER is only applied to a Case with one single connectivity if kase.connectivity_count != 1: err = f'{self.nodeID} can only be applied to one connectivity. ' err += f'Current case contains a total of {kase.connectivity_count}.' raise NodeDataError(err) # And has to be reoriented if not kase.is_reoriented: self.log.debug( 'Topology was provided without oriented SSE -> orienting.') kase = kase.apply_topologies()[0] # Generate the folder tree for a single connectivity. folders = kase.connectivities_paths[0].joinpath('loopgroup_master') folders.mkdir(parents=True, exist_ok=True) # Global step distance loop_step = kase.cast_absolute( )['configuration.defaults.distance.loop_step'] # Output keys kase.data.setdefault('metadata', {}).setdefault('loop_fragments', []) kase.data.setdefault('metadata', {}).setdefault('loop_lengths', []) # Find steps: Each pair of secondary structure. #it = kase.connectivities_str[0].split('.') #steps = [it[i:i + 2] for i in range(0, len(it) - 1)] lengths = kase.connectivity_len[0] start = 1 for i, (group, infos) in enumerate(self.steps.items()): self.log.info(f'Search at: {group}') # 1. Make folders and files wfolder = folders.joinpath(f'loopgroup{i + 1:02d}') wfolder.mkdir(parents=True, exist_ok=True) outfile = wfolder.joinpath(f'loopgroup_master.iter{i + 1:02d}.pdb') outfilePDS = wfolder.joinpath( f'loopgroup_master.iter{i + 1:02d}.pds') masfile = outfile.with_suffix('.master') gr = self.steps[f'group{i + 1:02d}'][-1].split(';') gr = [int(g) for g in gr if g != 'x'] for g in gr: checkpoint = wfolder.joinpath(f'loop{g:02d}/checkpoint.json') # 2. Check if checkpoint exists, retrieve and skip reload = TButil.checkpoint_in(self.log, checkpoint) if reload is not None: self.log.debug( f'Reloading loopgroup{i + 1:02d} with loop{g:02d}') kase.data['metadata']['loop_fragments'].append(reload) kase.data['metadata']['loop_lengths'].append( int(reload['edges']['loop'])) start += (int(reload['edges']['sse1']) + int(reload['edges']['loop'])) continue # 3. Check hairpin # Get SSEs and identifiers sses = [kase.get_sse_by_id(sse) for sse in infos[0]] #sse1_name, sse2_name = sse1['id'], sse2['id'] #is_hairpin = self.check_hairpin(sse1_name, sse2_name) # 4. Generate structures sses = TBstructure.build_pdb_object(self.log, sses, 5, concat=False, outfile=outfile) if not masfile.is_file(): # 5. calculate expected loop length by loop_step #Mdis, mdis = TBstructure.get_loop_length(self.log, sse1, sse2, loop_step, self.loop_range) # 6. Run MASTER #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds') self.log.debug(f'FILE {outfilePDS}') # -> make PDS query cmd = TBMaster.createPDS(outfile, outfilePDS) self.log.debug(f'EXECUTE: {" ".join(cmd)}') run(cmd, stdout=DEVNULL) # -> run MASTER cmd = TBMaster.master_groupedgap(outfilePDS, self.pdsdb, masfile, infos[1], self.rmsd_cut) self.log.debug(f'EXECUTE: {" ".join(cmd)}') result = run(cmd, stdout=DEVNULL) # TODO: implement motif compability # if result.returncode: # no loop between that connection, e.g. a motif ranging over multiple sse with keeping the loops # # 4. Generate structures # self.log.debug('generate combined structure') # sse = pd.concat([sse1, sse2], sort=False) # # # 6. Run MASTER # self.log.debug(Path(outfile)) # #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds') # self.log.debug(f'FILE {outfilePDS}') # # -> make PDS query # cmd = TBMaster.createPDS(outfile, outfilePDS) # self.log.debug(f'EXECUTE: {" ".join(cmd)}') # run(cmd, stdout=DEVNULL) # # -> run MASTER # cmd = TBMaster.master_nogap(outfilePDS, self.pdsdb, masfile, self.rmsd_cut) # self.log.debug(f'EXECUTE: {" ".join(cmd)}') # run(cmd, stdout=DEVNULL) # # # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files) # match_count = self.minimize_master_file(masfile) # self.log.debug(f'match count here {match_count}') # # # 7. Retrieve MASTER data # dfloop = self.process_master_data_no_gap(masfile, sse1_name, sse2_name) # sse1l, loopl, sse2l = lengths[i], int(dfloop['loop_length'].values[0]), lengths[i + 1] # total_len = sse1l + loopl + sse2l # end_edge = total_len + start - 1 # edges = {'ini': int(start), 'end': int(end_edge), 'sse1': int(sse1l), 'loop': int(loopl), 'sse2': int(sse2l)} # self.log.debug(f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}') # self.log.debug(dfloop.to_string()) # # # 8. Bring and Combine fragments from the different sources. # loop_data = self.make_fragment_files(dfloop, edges, masfile) # loop_data['match_count'] += match_count #else: # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files) match_count = self.minimize_master_file(masfile) # 7. Retrieve MASTER data df_container = self.process_master_data( masfile, infos[0], infos[1], infos[2]) for indx in list(df_container.order.drop_duplicates()): dfloop = df_container[df_container.order == indx] sse1l, loopl, sse2l = lengths[i], int( dfloop['loop_length'].values[0]), lengths[i + 1] total_len = sse1l + loopl + sse2l end_edge = total_len + start - 1 edges = { 'ini': int(start), 'end': int(end_edge), 'sse1': int(sse1l), 'loop': int(loopl), 'sse2': int(sse2l) } self.log.debug( f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}' ) self.log.debug(dfloop.to_string()) # 8. Bring and Combine fragments from the different sources. loop_data, nfolder = self.make_fragment_files(dfloop, edges, masfile, no_loop=True) loop_data['match_count'] += match_count # 9. Save data in the Case kase.data['metadata']['loop_fragments'].append(loop_data) kase.data['metadata']['loop_lengths'].append(int(loopl)) start += (sse1l + loopl) # 10. Checkpoint save checkpoint = nfolder.joinpath('checkpoint.json') TButil.checkpoint_out(self.log, checkpoint, loop_data) return kase
class hybridize(Node): """Run Rosettas hybridize protocol to generate designs. .. caution:: Due to the ``FastDesignMover``, this :class:`funfoldes` may take *a lot* of time. If possible, please use the ``slurm.use`` configuration. In case this is not possible, you may reduce the number of decoys to be generated via the `nstruct` parameter option. :param nstruct: Number of decoys to be generated (default: 2000). :param natbias: Score function bias towards per secondary structure types (default: 2.5). :param layer_design: If :class:`funfoldes` should a layer design approach (default: True). :raises: :NodeDataError: On **check**. If the required fields to be executed are not there. :NodeMissingError: On **exection**. If required variable inputs are not there. """ REQUIRED_FIELDS = ('metadata.fragments', 'metadata.loop_lengths') RETURNED_FIELDS = ('metadata.hybridize') VERSION = 'v1.0' def __init__(self, tag: int, nstruct: Optional[int] = 2000, natbias: Optional[float] = 2.5, layer_design: Optional[bool] = True) -> Case: super(hybridize, self).__init__(tag) self.nstruct = nstruct self.natbias = natbias self.layer_design = layer_design def single_check(self, dummy: Dict) -> Dict: kase = Case(dummy) # Check what it needs for itag in self.REQUIRED_FIELDS: if kase[itag] is None: raise NodeDataError(f'Field "{itag}" is required') # Include what keywords it adds (in this instance, nothing) kase.data.setdefault('metadata', {}).setdefault('hybridize', {}) return kase.data def single_execute(self, data: Dict) -> Dict: case = Case(data) data = { 'script': { 'assembly': '', 'design': '' }, 'cmd': { 'assembly': [ Path(TBcore.get_option('rosetta', 'scripts')), '-parser:protocol' ], 'design': [ Path(TBcore.get_option('rosetta', 'scripts')), '-parser:protocol' ] }, 'silent_files': { 'assembly': [], 'design': [] }, 'minisilent': { 'assembly': '', 'design': '' } } # Generate the folder tree for a single connectivity. wpaths = utils.folder_structure(case) # Check if checkpoint exists, retrieve and skip reload = TButil.checkpoint_in(self.log, wpaths['checkpoint']) if reload is not None: case.data['metadata']['hybridize'] = reload return case
current_case_file = kase.cast_absolute().write(wfolder.joinpath('current')) # Find steps: Tops we will submit 2-layer searches steps = get_steps([x[-1] == 'E' for x in kase.architecture_str.split('.')]) steps = [steps[step], ] if step is not None and TBcore.get_option('system', 'jupyter') else steps # Work by layers done_l = set() for i, step in enumerate(steps): # Step working directory stepfolder = wfolder.joinpath('step{:02d}'.format(i + 1)) stepfolder.mkdir(parents=True, exist_ok=True) query = stepfolder.joinpath('imaster.query{:02d}.pdb'.format(i + 1)) checkpoint = stepfolder.joinpath('checkpoint.json') reload = TButil.checkpoint_in(checkpoint) if reload is not None: kase.data['metadata']['imaster'].setdefault('step{:02d}'.format(i + 1), reload) kase.data['metadata']['corrections'].append(reload['corrections']) corrections.update(reload['corrections']) done_l.update(reload['layers']) # CKase = CKase.apply_corrections(corrections) continue # Apply corrections from previous steps and rebuild CKase = Case(kase).apply_corrections(corrections) with TBcore.on_option_value('system', 'overwrite', True): CKase = plugin_source.load_plugin('builder').case_apply(CKase, connectivity=True) # Generate structure query and get layer displacements layers = set(itemgetter(*step)(ascii_uppercase))
def single_execute(self, data: Dict) -> Dict: case = Case(data) data = {'protocol': self.protocol, 'files': []} # Fragments can only be made for a full, reoriented Case. if case.connectivity_count > 1: raise NodeDataError( 'FunFolDes can only be applied to one connectivity.') # Generate the folder tree for a single connectivity. folders = case.connectivities_paths[0].joinpath('fragment_maker') folders.mkdir(parents=True, exist_ok=True) checkpoint = folders.joinpath('checkpoint.json') # Check if checkpoint exists, retrieve and skip reload = TButil.checkpoint_in(self.log, checkpoint) if reload is not None and reload['protocol'] == self.protocol: case.data['metadata']['fragments'] = reload return case # Switch depending on the fragment_protocol if self.protocol == 'loop_master': frags3, frags9, profile = self.loop_master_protocol(case, folders) data['files'] = (frags3, frags9) data['profile'] = profile if self.protocol == 'loopgroup_master': #data['files'] = self.loopgroup_master_protocol(case, folders) frags3, frags9, profile = self.loop_master_protocol(case, folders) data['files'] = (frags3, frags9) data['profile'] = profile