def test_topology(self): c = Case('test_topology') c = c.add_topology('A2E.A1E.B1H.A3E.B2H.A5E.A4E') assert c.shape == (5, 2) assert c.shape_len == ((7, 7, 7, 7, 7), (13, 13)) assert c.architecture_str == '5E.2H' assert c.connectivity_count == 1 assert c.connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A5E.A4E', ) c = c.add_topology('A2E.A1E.B1H.A3E.B2H.A5E.A4E') with pytest.raises(ValidationError) as message: c = c.add_topology('A2E.A1E.B1H.A3E.B2H.A5E.A4E.B3H') assert message.value.messages == [ 'Provided topology does not match existing architecture.' ] assert c.connectivity_count == 1 assert c.connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A5E.A4E', ) cs = c.apply_topologies() assert len(cs) == 1 assert cs[0].shape_len == c.shape_len assert cs[0].shape == c.shape assert cs[0].connectivities_str == c.connectivities_str c = Case('test_topology') c = c.add_architecture('5E:8:8:7:7:7.2H:18:19') c = c.add_topology('A2E8.A1E8.B1H18.A3E7.B2H19.A5E7.A4E7') c = c.add_topology('A2E8.A1E8.B1H18.A3E7.B2H19.A4E7.A5E7') c = c.add_topology('A1E8.A2E8.B1H18.A3E7.B2H19.A4E7.A5E7') assert c.shape == (5, 2) assert c.shape_len == ((8, 8, 7, 7, 7), (18, 19)) assert c.architecture_str == '5E.2H' assert c.connectivity_count == 3 assert c.connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A5E.A4E', 'A2E.A1E.B1H.A3E.B2H.A4E.A5E', 'A1E.A2E.B1H.A3E.B2H.A4E.A5E') cs = c.apply_topologies() assert len(cs) == 3 assert cs[0].connectivity_count == 1 assert cs[0].connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A5E.A4E', ) assert cs[1].connectivity_count == 1 assert cs[1].connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A4E.A5E', ) assert cs[2].connectivity_count == 1 assert cs[2].connectivities_str == ('A1E.A2E.B1H.A3E.B2H.A4E.A5E', ) fig = plt.figure(figsize=(15, 5)) ax1 = plt.subplot2grid((1, 3), (0, 0), fig=fig) plot_case_sketch(cs[0], ax1) ax2 = plt.subplot2grid((1, 3), (0, 1), fig=fig) plot_case_sketch(cs[1], ax2) ax3 = plt.subplot2grid((1, 3), (0, 2), fig=fig) plot_case_sketch(cs[2], ax3) return fig
def single_execute(self, data: Dict) -> Dict: kase = Case(data) # Loop MASTER is only applied to a Case with one single connectivity if kase.connectivity_count != 1: err = f'{self.nodeID} can only be applied to one connectivity. ' err += f'Current case contains a total of {kase.connectivity_count}.' raise NodeDataError(err) # And has to be reoriented if not kase.is_reoriented: self.log.debug( 'Topology was provided without oriented SSE -> orienting.') kase = kase.apply_topologies()[0] # Generate the folder tree for a single connectivity. folders = kase.connectivities_paths[0].joinpath('loopgroup_master') folders.mkdir(parents=True, exist_ok=True) # Global step distance loop_step = kase.cast_absolute( )['configuration.defaults.distance.loop_step'] # Output keys kase.data.setdefault('metadata', {}).setdefault('loop_fragments', []) kase.data.setdefault('metadata', {}).setdefault('loop_lengths', []) # Find steps: Each pair of secondary structure. #it = kase.connectivities_str[0].split('.') #steps = [it[i:i + 2] for i in range(0, len(it) - 1)] lengths = kase.connectivity_len[0] start = 1 for i, (group, infos) in enumerate(self.steps.items()): self.log.info(f'Search at: {group}') # 1. Make folders and files wfolder = folders.joinpath(f'loopgroup{i + 1:02d}') wfolder.mkdir(parents=True, exist_ok=True) outfile = wfolder.joinpath(f'loopgroup_master.iter{i + 1:02d}.pdb') outfilePDS = wfolder.joinpath( f'loopgroup_master.iter{i + 1:02d}.pds') masfile = outfile.with_suffix('.master') gr = self.steps[f'group{i + 1:02d}'][-1].split(';') gr = [int(g) for g in gr if g != 'x'] for g in gr: checkpoint = wfolder.joinpath(f'loop{g:02d}/checkpoint.json') # 2. Check if checkpoint exists, retrieve and skip reload = TButil.checkpoint_in(self.log, checkpoint) if reload is not None: self.log.debug( f'Reloading loopgroup{i + 1:02d} with loop{g:02d}') kase.data['metadata']['loop_fragments'].append(reload) kase.data['metadata']['loop_lengths'].append( int(reload['edges']['loop'])) start += (int(reload['edges']['sse1']) + int(reload['edges']['loop'])) continue # 3. Check hairpin # Get SSEs and identifiers sses = [kase.get_sse_by_id(sse) for sse in infos[0]] #sse1_name, sse2_name = sse1['id'], sse2['id'] #is_hairpin = self.check_hairpin(sse1_name, sse2_name) # 4. Generate structures sses = TBstructure.build_pdb_object(self.log, sses, 5, concat=False, outfile=outfile) if not masfile.is_file(): # 5. calculate expected loop length by loop_step #Mdis, mdis = TBstructure.get_loop_length(self.log, sse1, sse2, loop_step, self.loop_range) # 6. Run MASTER #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds') self.log.debug(f'FILE {outfilePDS}') # -> make PDS query cmd = TBMaster.createPDS(outfile, outfilePDS) self.log.debug(f'EXECUTE: {" ".join(cmd)}') run(cmd, stdout=DEVNULL) # -> run MASTER cmd = TBMaster.master_groupedgap(outfilePDS, self.pdsdb, masfile, infos[1], self.rmsd_cut) self.log.debug(f'EXECUTE: {" ".join(cmd)}') result = run(cmd, stdout=DEVNULL) # TODO: implement motif compability # if result.returncode: # no loop between that connection, e.g. a motif ranging over multiple sse with keeping the loops # # 4. Generate structures # self.log.debug('generate combined structure') # sse = pd.concat([sse1, sse2], sort=False) # # # 6. Run MASTER # self.log.debug(Path(outfile)) # #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds') # self.log.debug(f'FILE {outfilePDS}') # # -> make PDS query # cmd = TBMaster.createPDS(outfile, outfilePDS) # self.log.debug(f'EXECUTE: {" ".join(cmd)}') # run(cmd, stdout=DEVNULL) # # -> run MASTER # cmd = TBMaster.master_nogap(outfilePDS, self.pdsdb, masfile, self.rmsd_cut) # self.log.debug(f'EXECUTE: {" ".join(cmd)}') # run(cmd, stdout=DEVNULL) # # # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files) # match_count = self.minimize_master_file(masfile) # self.log.debug(f'match count here {match_count}') # # # 7. Retrieve MASTER data # dfloop = self.process_master_data_no_gap(masfile, sse1_name, sse2_name) # sse1l, loopl, sse2l = lengths[i], int(dfloop['loop_length'].values[0]), lengths[i + 1] # total_len = sse1l + loopl + sse2l # end_edge = total_len + start - 1 # edges = {'ini': int(start), 'end': int(end_edge), 'sse1': int(sse1l), 'loop': int(loopl), 'sse2': int(sse2l)} # self.log.debug(f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}') # self.log.debug(dfloop.to_string()) # # # 8. Bring and Combine fragments from the different sources. # loop_data = self.make_fragment_files(dfloop, edges, masfile) # loop_data['match_count'] += match_count #else: # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files) match_count = self.minimize_master_file(masfile) # 7. Retrieve MASTER data df_container = self.process_master_data( masfile, infos[0], infos[1], infos[2]) for indx in list(df_container.order.drop_duplicates()): dfloop = df_container[df_container.order == indx] sse1l, loopl, sse2l = lengths[i], int( dfloop['loop_length'].values[0]), lengths[i + 1] total_len = sse1l + loopl + sse2l end_edge = total_len + start - 1 edges = { 'ini': int(start), 'end': int(end_edge), 'sse1': int(sse1l), 'loop': int(loopl), 'sse2': int(sse2l) } self.log.debug( f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}' ) self.log.debug(dfloop.to_string()) # 8. Bring and Combine fragments from the different sources. loop_data, nfolder = self.make_fragment_files(dfloop, edges, masfile, no_loop=True) loop_data['match_count'] += match_count # 9. Save data in the Case kase.data['metadata']['loop_fragments'].append(loop_data) kase.data['metadata']['loop_lengths'].append(int(loopl)) start += (sse1l + loopl) # 10. Checkpoint save checkpoint = nfolder.joinpath('checkpoint.json') TButil.checkpoint_out(self.log, checkpoint, loop_data) return kase