if reload is not None: case.data['metadata']['funfoldes'] = reload return case # We need to check that the rosetta_scripts executable is available if not data['cmd']['folding'][0].is_file() or not os.access( str(data['cmd']['folding'][0]), os.X_OK): raise IOError('Cannot find executable {}'.format( data['cmd']['folding'][0])) # Build the structure utils.build_template_sketch(case, wpaths['pdb']) # Make the Folding and Design RScripts data = utils.make_scripts(case, wpaths, data, natbias, layer_design) # Finish command data = utils.commands(case, nstruct, data, wpaths) # Execute data = utils.execute(data, wpaths) # Update metadata data = utils.update_data(data, wpaths) # Checkpoint save TButil.checkpoint_out(wpaths['checkpoint'], data) case.data['metadata']['funfoldes'] = data return case
def single_execute(self, data: Dict) -> Dict: kase = Case(data) # Loop MASTER is only applied to a Case with one single connectivity if kase.connectivity_count != 1: err = f'{self.nodeID} can only be applied to one connectivity. ' err += f'Current case contains a total of {kase.connectivity_count}.' raise NodeDataError(err) # And has to be reoriented if not kase.is_reoriented: self.log.debug( 'Topology was provided without oriented SSE -> orienting.') kase = kase.apply_topologies()[0] # Generate the folder tree for a single connectivity. folders = kase.connectivities_paths[0].joinpath('loopgroup_master') folders.mkdir(parents=True, exist_ok=True) # Global step distance loop_step = kase.cast_absolute( )['configuration.defaults.distance.loop_step'] # Output keys kase.data.setdefault('metadata', {}).setdefault('loop_fragments', []) kase.data.setdefault('metadata', {}).setdefault('loop_lengths', []) # Find steps: Each pair of secondary structure. #it = kase.connectivities_str[0].split('.') #steps = [it[i:i + 2] for i in range(0, len(it) - 1)] lengths = kase.connectivity_len[0] start = 1 for i, (group, infos) in enumerate(self.steps.items()): self.log.info(f'Search at: {group}') # 1. Make folders and files wfolder = folders.joinpath(f'loopgroup{i + 1:02d}') wfolder.mkdir(parents=True, exist_ok=True) outfile = wfolder.joinpath(f'loopgroup_master.iter{i + 1:02d}.pdb') outfilePDS = wfolder.joinpath( f'loopgroup_master.iter{i + 1:02d}.pds') masfile = outfile.with_suffix('.master') gr = self.steps[f'group{i + 1:02d}'][-1].split(';') gr = [int(g) for g in gr if g != 'x'] for g in gr: checkpoint = wfolder.joinpath(f'loop{g:02d}/checkpoint.json') # 2. Check if checkpoint exists, retrieve and skip reload = TButil.checkpoint_in(self.log, checkpoint) if reload is not None: self.log.debug( f'Reloading loopgroup{i + 1:02d} with loop{g:02d}') kase.data['metadata']['loop_fragments'].append(reload) kase.data['metadata']['loop_lengths'].append( int(reload['edges']['loop'])) start += (int(reload['edges']['sse1']) + int(reload['edges']['loop'])) continue # 3. Check hairpin # Get SSEs and identifiers sses = [kase.get_sse_by_id(sse) for sse in infos[0]] #sse1_name, sse2_name = sse1['id'], sse2['id'] #is_hairpin = self.check_hairpin(sse1_name, sse2_name) # 4. Generate structures sses = TBstructure.build_pdb_object(self.log, sses, 5, concat=False, outfile=outfile) if not masfile.is_file(): # 5. calculate expected loop length by loop_step #Mdis, mdis = TBstructure.get_loop_length(self.log, sse1, sse2, loop_step, self.loop_range) # 6. Run MASTER #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds') self.log.debug(f'FILE {outfilePDS}') # -> make PDS query cmd = TBMaster.createPDS(outfile, outfilePDS) self.log.debug(f'EXECUTE: {" ".join(cmd)}') run(cmd, stdout=DEVNULL) # -> run MASTER cmd = TBMaster.master_groupedgap(outfilePDS, self.pdsdb, masfile, infos[1], self.rmsd_cut) self.log.debug(f'EXECUTE: {" ".join(cmd)}') result = run(cmd, stdout=DEVNULL) # TODO: implement motif compability # if result.returncode: # no loop between that connection, e.g. a motif ranging over multiple sse with keeping the loops # # 4. Generate structures # self.log.debug('generate combined structure') # sse = pd.concat([sse1, sse2], sort=False) # # # 6. Run MASTER # self.log.debug(Path(outfile)) # #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds') # self.log.debug(f'FILE {outfilePDS}') # # -> make PDS query # cmd = TBMaster.createPDS(outfile, outfilePDS) # self.log.debug(f'EXECUTE: {" ".join(cmd)}') # run(cmd, stdout=DEVNULL) # # -> run MASTER # cmd = TBMaster.master_nogap(outfilePDS, self.pdsdb, masfile, self.rmsd_cut) # self.log.debug(f'EXECUTE: {" ".join(cmd)}') # run(cmd, stdout=DEVNULL) # # # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files) # match_count = self.minimize_master_file(masfile) # self.log.debug(f'match count here {match_count}') # # # 7. Retrieve MASTER data # dfloop = self.process_master_data_no_gap(masfile, sse1_name, sse2_name) # sse1l, loopl, sse2l = lengths[i], int(dfloop['loop_length'].values[0]), lengths[i + 1] # total_len = sse1l + loopl + sse2l # end_edge = total_len + start - 1 # edges = {'ini': int(start), 'end': int(end_edge), 'sse1': int(sse1l), 'loop': int(loopl), 'sse2': int(sse2l)} # self.log.debug(f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}') # self.log.debug(dfloop.to_string()) # # # 8. Bring and Combine fragments from the different sources. # loop_data = self.make_fragment_files(dfloop, edges, masfile) # loop_data['match_count'] += match_count #else: # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files) match_count = self.minimize_master_file(masfile) # 7. Retrieve MASTER data df_container = self.process_master_data( masfile, infos[0], infos[1], infos[2]) for indx in list(df_container.order.drop_duplicates()): dfloop = df_container[df_container.order == indx] sse1l, loopl, sse2l = lengths[i], int( dfloop['loop_length'].values[0]), lengths[i + 1] total_len = sse1l + loopl + sse2l end_edge = total_len + start - 1 edges = { 'ini': int(start), 'end': int(end_edge), 'sse1': int(sse1l), 'loop': int(loopl), 'sse2': int(sse2l) } self.log.debug( f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}' ) self.log.debug(dfloop.to_string()) # 8. Bring and Combine fragments from the different sources. loop_data, nfolder = self.make_fragment_files(dfloop, edges, masfile, no_loop=True) loop_data['match_count'] += match_count # 9. Save data in the Case kase.data['metadata']['loop_fragments'].append(loop_data) kase.data['metadata']['loop_lengths'].append(int(loopl)) start += (sse1l + loopl) # 10. Checkpoint save checkpoint = nfolder.joinpath('checkpoint.json') TButil.checkpoint_out(self.log, checkpoint, loop_data) return kase
rules = list(zip([sse['id'] for sse in CKase.ordered_structures], list(zip(cstrs, cends)), list(next(flip) for _ in range(len(CKase.ordered_structures))))) extras = TButil.pdb_geometry_from_rules(query, rules) # MASTER search createpds = TButil.createPDS(query) TButil.plugin_bash(createpds) run(createpds, stdout=DEVNULL) masters = TButil.master_best_each(query.with_suffix('.pds'), stepfolder.joinpath('_master'), rmsd) data = submit_searches(masters, stepfolder, current_case_file, '.'.join([x['id'] for x in sses])) data = calc_corrections(data, kase, set(data['layers']), done_l, extras, bin) kase.data['metadata']['imaster'].setdefault('step{:02d}'.format(i + 1), data) TButil.checkpoint_out(checkpoint, data) kase.data['metadata']['corrections'].append(data['corrections']) done_l.update(data['layers']) corrections.update(data['corrections']) return kase def submit_searches( cmd: List[str], wdir: Path, current_case_file: Path, current_sse: str ) -> Dict: """ """ unimaster = wdir.joinpath('match.master') imaster = Path(__file__).parent.joinpath('imaster.py') unidata = wdir.joinpath('geometry.csv') if unimaster.is_file() and unidata.is_file(): return {'matches': unimaster, 'stats': unidata, 'corrections': None,
if TBcore.get_option('system', 'debug'): sys.stdout.write( '\nINI: {}; END: {}; SSE1: {}; LOOP: {}; SSE2: {}\n\n'.format( start, end_edge, sse1l, loopl, sse2l)) sys.stdout.write(dfloop.to_string() + '\n') # 8. Make Fragments loop_data = make_fragment_files(dfloop, edges, masfile) loop_data['match_count'] += match_count case.data['metadata']['loop_fragments'].append(loop_data) case.data['metadata']['loop_lengths'].append(int(loopl)) start += (sse1l + loopl) # Checkpoint save TButil.checkpoint_out(checkpoint, loop_data) return case def make_fragment_files(dfloop: pd.DataFrame, edges: Dict, masfile: Path) -> Dict: """ """ data = { 'loop_length': int(dfloop.iloc[0]['loop_length']), 'abego': list(dfloop['loop'].values), 'edges': edges, 'fragfiles': [], 'match_count': 0 }
case.data['metadata']['hybridize'] = reload return case # We need to check that the rosetta_scripts executable is available if not data['cmd']['folding'][0].is_file() or not os.access( str(data['cmd']['assembly'][0]), os.X_OK): raise NodeMissingError( f'Cannot find executable {data["cmd"]["assembly"][0]}') # Build the structure utils.build_template_sketch(self.log, case, wpaths['pdb']) # Make the Folding and Design RScripts data = utils.make_scripts(self.log, case, wpaths, data, self.natbias, self.layer_design) # Finish command data = utils.commands(case, self.nstruct, data, wpaths) # Execute data = utils.execute(self.log, data, wpaths) # Update metadata data = utils.update_data(self.log, data, wpaths) # Checkpoint save TButil.checkpoint_out(self.log, wpaths['checkpoint'], data) case.data['metadata']['funfoldes'] = data return case
# Switch depending on the fragment_protocol if self.protocol == 'loop_master': frags3, frags9, profile = self.loop_master_protocol(case, folders) data['files'] = (frags3, frags9) data['profile'] = profile if self.protocol == 'loopgroup_master': #data['files'] = self.loopgroup_master_protocol(case, folders) frags3, frags9, profile = self.loop_master_protocol(case, folders) data['files'] = (frags3, frags9) data['profile'] = profile # Store data case.data['metadata']['fragments'] = data # Checkpoint save TButil.checkpoint_out(self.log, checkpoint, data) return case def loop_master_protocol(self, case: Case, folders: Path) -> Tuple[str, str]: """ """ lf = case['metadata.loop_fragments'] if lf is None: raise NodeMissingError( 'Data that should be loaded through loop_master is not found.') for i, loop in enumerate(lf): if i == 0: ff3 = parse_rosetta_fragments(loop['fragfiles'][0]) ff9 = parse_rosetta_fragments(loop['fragfiles'][1])