def add_hydrogens(config, path, inp, out): #Initialize from SBI.structure import PDB import shutil src_path = config.get('Paths', 'modppi_path') hbplus = config.get('Paths', 'hbplus_path') reduce_exe = config.get('Paths', 'reduce_path') reduce_db = config.get('Paths', 'reduce_db_path') relax_exe = config.get('Paths', 'relax_exe') hydrogen_type = config.get('Parameters', 'hydrogens') relax = config.get('Parameters', 'relax') cwd = os.getcwd() os.chdir(path) if fileExist(inp): if len(inp.split('.')) > 0: output_hbplus = ".".join(inp.split('.')[:-1]) + ".h" else: output_hbplus = inp.strip() + ".h" if hydrogen_type == "full": os.system("%s -Quiet %s -DB %s> %s" % (reduce_exe, inp, reduce_db, output_hbplus)) else: os.system("%s -o %s >& hbplus.log" % (hbplus, inp)) if relax == "yes": sys.stdout.write( "\t\t\t-- Relaxing the hydrogen-intermediate model %s (see Rosetta output in relax.log and score.sc)...\n" % output_hbplus) os.system( "%s -s %s -in:file:fullatom -nstruct 1 -packing:repack_only >& relax.log" % (relax_exe, output_hbplus)) opt_model = ".".join(output_hbplus.split('.')[:-1]) + "_0001.pdb" old_model = ".".join( output_hbplus.split('.')[:-1]) + "_non_optimized.pdb" shutil.move(output_hbplus, old_model) if fileExist(opt_model): check_pdb = PDB(opt_model) if check_pdb.has_protein: check_pdb.clean() check_pdb.write(output_hbplus) try: sys.path.remove(opt_model) except: sys.stdout.write("\t\t\t-- Keeping old file %s ...\n" % opt_model) else: shutil.copy(old_model, output_hbplus) else: shutil.copy(old_model, output_hbplus) if not fileExist(output_hbplus): raise ValueError("Cannot find file with hydrogen atoms") else: pdb = PDB(output_hbplus) pdb.clean() pdb.write(out, force=True) os.chdir(cwd)
def main(): #Initialize options = parse_user_arguments() verbose = options.show pdb_path = os.path.join(config.get('Paths', 'modppi_path'), config.get('Paths', 'pdb_path')) dummy_dir = options.dummy_dir try: did_path = os.path.join(config.get('Paths', 'modppi_path'), config.get('Paths', '3did_path')) data_path = os.path.join(config.get('Paths', 'modppi_path'), config.get('Paths', 'data_path')) except: did_path = options.outdir data_path = options.outdir if not os.path.exists(did_path): os.makedirs(did_path) if not os.path.exists(dummy_dir): os.makedirs(dummy_dir) if not os.path.exists(data_path): sys.stderr.write( "No DATA directory, please check your installation or INPUT\n") #Parse did flat file did = parse_3did(options) #Create PDB files of 3DiD interactions for dd, cases in did.iteritems(): for label in xrange(0, len(cases)): #Define the name of the PDB output file with domain-domain interactions did_file = os.path.join( did_path, dd[0] + ":" + dd[1] + "#" + str(label) + ".brk.gz") if not os.path.exists(did_file.lower()): did_file = os.path.join( did_path, dd[0] + ":" + dd[1] + "#" + str(label) + ".brk") if not os.path.exists(did_file.lower()): if verbose: sys.stderr.write("\t\t--Create %s\n" % (did_file.lower())) pdb_code, d1, d2 = cases[label] pdb_file = os.path.join(pdb_path, pdb_code[1:3].lower(), "pdb" + pdb_code + ".ent") if not os.path.exists(pdb_file): pdb_file = os.path.join(pdb_path, pdb_code[1:3].lower(), "pdb" + pdb_code + ".ent.gz") if not os.path.exists(pdb_file): if verbose: sys.stderr.write("\t\t\t-- %s not found\n" % pdb_file) continue try: pdb = PDB(pdb_file) brk = PDB() pdb_chain_A = pdb.get_chain_by_id(d1[0]) start_A = d1[1] end_A = d1[2] pdb_chain_B = pdb.get_chain_by_id(d2[0]) start_B = d2[1] end_B = d2[2] brk_chain_A = pdb_chain_A.extract(init=start_A, end=end_A) brk_chain_A.chain = "A" brk.add_chain(brk_chain_A) brk_chain_B = pdb_chain_B.extract(init=start_B, end=end_B) brk_chain_B.chain = "B" brk.add_chain(brk_chain_B) brk.clean() brk.write(did_file.lower()) except Exception as e: if verbose: sys.stderr.write("\t\t\t Error: %s\n" % e) continue #Create list of interactions and FASTA sequences of 3DiD did_interactions = open(os.path.join(data_path, options.interactions_file), "w") did_fasta = open(os.path.join(data_path, options.seq_file), "w") for brk in os.listdir(did_path): if verbose: sys.stderr.write("\t\t-- Reading %s \n" % os.path.join(did_path, brk)) try: pdb = PDB(os.path.join(did_path, brk)) id_chain = [] for c in pdb.chain_identifiers: pdb_chain = pdb.get_chain_by_id(c) id_chain.append(pdb.id + "_" + c) printfasta(did_fasta, pdb.id + "_" + c, pdb_chain.gapped_protein_sequence) did_interactions.write("%s\t%s\n" % (id_chain[0], id_chain[1])) except Exception as e: if verbose: sys.stderr.write( "\t\t-- %s cannot be read\n\t\t Error: %s\n" % (os.path.join(did_path, brk), e)) continue did_interactions.close() did_fasta.close()
ofile.parent.mkdir(parents=True, exist_ok=True) structure, _ = TButil.build_pdb_object(self.log, case.ordered_structures, 2) self.log.notice(f'Writing structure {ofile}') structure.write(output_file=str(ofile), format='pdb', clean=True, force=TBcore.get_option('system', 'overwrite')) if self.motif and binder is not None: bfile.parent.mkdir(parents=True, exist_ok=True) self.log.notice(f'Writing binder {bfile}') fl_binder_ali.write(output_file=str(bfile), format='pdb', clean=True, force=TBcore.get_option( 'system', 'overwrite')) ifile.parent.mkdir(parents=True, exist_ok=True) self.log.notice(f'Writing input {ifile}') initial.write(output_file=str(ifile), format='pdb', clean=True, force=TBcore.get_option('system', 'overwrite')) return case def make_structure(self, sse: Dict, pick_aa: Optional[str] = None) -> Case: """ """
#full_structure = [pdb,] binders = [] for key in case.data['metadata']['binder']: binder = case.data['metadata']['binder'][key] binders.append(binder) binderfile = os.path.dirname(str(pdb_file)) + f'/binder_{key}.pdb' binder_chains.extend( binder['auth_asym_id'].drop_duplicates().tolist()) #full_structure.append(binder) log.debug(f'Adding binder chains: {binder_chains}') bindersfile = os.path.dirname(str(pdb_file)) + f'/binders.pdb' log.notice(f'Writing structure {bindersfile}') binders = PDB(pd.concat(binders, sort=False)) binders.write(bindersfile, format='pdb', clean=True, force=TBcore.get_option('system', 'overwrite')) full_structure = PDB( pd.concat([pdb[columns], binders[columns]], sort=False)) log.notice(f'Writing structure {full_file}') full_structure.write(str(full_file), format='pdb', clean=True, force=TBcore.get_option('system', 'overwrite')) #else: # pdb.write(str(pdb_file), format='pdb', clean=True, force=TBcore.get_option('system', 'overwrite')) # Push back hotspots, motif seq num and binder chain if binder_chains == []: binder_chains = None if res_attach == []: res_attach = None
def modelling(queriesA_original,queriesB_original,queriesA, queriesB, hit_items_A, hit_items_B, sections_modeled, remaining_sections_A, remaining_sections_B, options): # Initialize verbose = options.show output_dir = options.outdir dummy_dir = options.dummy_dir hydrogens = options.hbplus force_model =options.force python_path = config.get('Paths', 'python_path') src_path = config.get('Paths','modppi_path') modeller_path = os.path.join(config.get('Paths', 'modeller_path')) modpy_path = os.path.join(src_path, config.get('Paths', 'functions_path'),"modpy") numMod= options.nmodels renumerate = options.renumerate # Assign the PID to the dummy modeling and avoid overwritting files modelling_dummy_name = 'modelling_' + str(os.getpid()) + str(random.randint(0,os.getpid())) #modelling_dummy_name = 'modelling_' + str(os.getpid()) make_subdirs(dummy_dir, subdirs = [modelling_dummy_name]) modelling_dir = os.path.join(dummy_dir, modelling_dummy_name) # Get items from the hits query_A_orig = queriesA_original.get(hit_items_A[0]) query_B_orig = queriesB_original.get(hit_items_B[0]) query_A = queriesA.get(hit_items_A[0]).get_sequence() query_B = queriesB.get(hit_items_B[0]).get_sequence() query_name_A = hit_items_A[0] query_name_B = hit_items_B[0] query_id_A = query_name_A.split(':')[0] query_start = hit_items_A[4][0] query_end = int(hit_items_A[4][-1]) + int(hit_items_B[4][-1]) template_name_A_chain = hit_items_A[1] template_name_B_chain = hit_items_B[1] template_chain_A_chain = template_name_A_chain.split('_')[-1] template_chain_B_chain = template_name_B_chain.split('_')[-1] template_A_chain_start = hit_items_A[5][0] template_B_chain_start = hit_items_B[5][0] template_id_A = "_".join(template_name_A_chain.split('_')[:-1]) template_id_B = "_".join(template_name_B_chain.split('_')[:-1]) sequences_complex = {} sequences_complex.setdefault("A",query_A_orig) sequences_complex.setdefault("B",query_B_orig) # Get the positions of the current section extension_threshold = int(config.get('Parameters', 'extension_threshold')) current_A_section = [hit_items_A[4][0], hit_items_A[4][-1]] current_B_section = [hit_items_B[4][0], hit_items_B[4][-1]] current_sections = [current_A_section, current_B_section] current_interaction = '%s::%s' %(query_name_A, query_name_B) # Initialize 'sections_modeled' dictionary if not sections_modeled.get(current_interaction): section_group = sections_modeled.setdefault(current_interaction, []) section_group.append(current_sections) # Check if the segments of the current interaction belong to a previous group for section_pair in sections_modeled.get(current_interaction): # The segments must be within a given interval if (section_pair[0][0] - extension_threshold <= current_sections[0][0] <= section_pair[0][0] + extension_threshold and section_pair[0][1] - extension_threshold <= current_sections[0][1] <= section_pair[0][1] + extension_threshold and section_pair[1][0] - extension_threshold <= current_sections[1][0] <= section_pair[1][0] + extension_threshold and section_pair[1][1] - extension_threshold <= current_sections[1][1] <= section_pair[1][1] + extension_threshold): current_sections = section_pair break # If the segments are not within the interval, create a new group else: section_group = sections_modeled.setdefault(current_interaction, []) section_group.append(current_sections) # Get the sections that have not been used in the alignment query_A_fragment_used = hit_items_A[2].replace('-', '') query_B_fragment_used = hit_items_B[2].replace('-', '') remaining_terminus_A = query_A.split(query_A_fragment_used) remaining_terminus_B = query_B.split(query_B_fragment_used) Nterminus_name_A = '%s_1-%s' %(query_name_A, hit_items_A[4][0] - 1) Cterminus_name_A = '%s_%s-%s' %(query_name_A, hit_items_A[4][-1] + 1, len(query_A)) Nterminus_name_B = '%s_1-%s' %(query_name_B, hit_items_B[4][0] - 1) Cterminus_name_B = '%s_%s-%s' %(query_name_B, hit_items_B[4][-1] + 1, len(query_B)) # If there are remaining sections, store them in the dictionary if hit_items_A[4][0] > 1: remaining_sections_A[Nterminus_name_A] = ProteinSequence(Nterminus_name_A, remaining_terminus_A[0]) if hit_items_A[4][-1] < len(query_A): remaining_sections_A[Cterminus_name_A] = ProteinSequence(Cterminus_name_A, remaining_terminus_A[-1]) if hit_items_B[4][0] > 1: remaining_sections_B[Nterminus_name_B] = ProteinSequence(Nterminus_name_B, remaining_terminus_B[0]) if hit_items_B[4][-1] < len(query_B): remaining_sections_B[Cterminus_name_B] = ProteinSequence(Cterminus_name_B, remaining_terminus_B[-1]) #Create LOG for tests if verbose: dummy_log_file="%s/%s.log"%(modelling_dir, template_id_A) dummy_log=open(dummy_log_file,"a") # Create PDB file if verbose: sys.stdout.write('\t\t-- Using templates %s and %s...\n' %(template_name_A_chain, template_name_B_chain)) pdb_name = template_id_A dummy_pdb_file = '%s/%s.pdb' %(modelling_dir, pdb_name.replace(":","-")) # Initialize PDB object pdb_obj = PDB() #Check template in PDB files src_path = config.get('Paths','modppi_path') pdb_path = os.path.join(src_path, config.get('Paths', 'pdb_path'), template_id_A[1:3].lower()) pdb_file = os.path.join(pdb_path, 'pdb' + template_id_A.lower() + '.ent') if not os.path.exists(pdb_file): sys.stderr.write('WARNING: PDB file %s was not found, try compressed\n' %(pdb_file)) pdb_file = os.path.join(pdb_path, 'pdb' + template_id_A.lower() + '.ent.gz') #Check now template in 3DiD files if not os.path.exists(pdb_file): sys.stderr.write('WARNING: PDB file %s was not found, try 3DiD ".brk" suffix\n' %(pdb_file)) pdb_path = os.path.join(src_path, config.get('Paths', '3did_path')) pdb_file = os.path.join(pdb_path, template_id_A.lower() + '.brk') if not os.path.exists(pdb_file): sys.stderr.write('WARNING: PDB file %s was not found, try 3DiD ".brk" suffix compressed\n' %(pdb_file)) pdb_file = os.path.join(pdb_path, template_id_A.lower() + '.brk.gz') # If the PDB file is not found in the database, skips to the next interaction if not os.path.exists(pdb_file): sys.stderr.write('WARNING: PDB file %s was not found\n' %(pdb_file)) raise ModelException pdb_chain_obj = PDB(pdb_file) pdb_chain_obj.clean() # Add only the chains present in the alignment pdb_obj.add_chain(pdb_chain_obj.get_chain_by_id(template_chain_A_chain)) pdb_obj.add_chain(pdb_chain_obj.get_chain_by_id(template_chain_B_chain)) # Get sequences from PDB, where 'x' are gaps and 'X' are heteroatoms pdb_seqA = pdb_obj.chains[0].gapped_protein_sequence.replace('x', '-').replace('X', '.') pdb_seqB = pdb_obj.chains[1].gapped_protein_sequence.replace('x', '-').replace('X', '.') # Create the dummy PDB file pdb_obj.clean() pdb_obj.write(output_file = dummy_pdb_file,force=True) # Check contacts PPI_threshold_type = config.get('Parameters', 'PPI_threshold_type') PPI_distance_threshold = float(config.get('Parameters', 'PPI_distance_threshold')) protein_complex = Complex(pdb_obj, PPI_type = PPI_threshold_type, PPI_distance = PPI_distance_threshold) # If the proteins don't form a complex, avoids modelling if len(protein_complex.PPInterfaces[0].contacts) == 0: sys.stderr.write('WARNING: No interaction between %s and %s ( for %s %s)\n' %(template_name_A_chain, template_name_B_chain, query_name_A, query_name_B)) remove_files([dummy_pdb_file]) raise ModelException else: if verbose: sys.stdout.write('\t\t\t-- Accepted interaction between %s and %s ( for %s %s)...\n' %(template_name_A_chain, template_name_B_chain, query_name_A, query_name_B)) # Correct possible discrepancies between the template sequence found in the FASTA file of the nodes in the PIN and the sequence found in the PDB file # e.g. The sequence of a protein can have an 'X' in the FASTA file and an 'M' in the newest version of the PDB file template_seqA = hit_items_A[3] template_seqA_ungapped = re.sub('-', '', template_seqA) pdbA_section = pdb_seqA[hit_items_A[5][0]-1:hit_items_A[5][-1]] for pair in itertools.izip(template_seqA_ungapped, pdbA_section): if pair[0] == 'X' or pair[0] == 'x': template_seqA = re.sub('[xX]', pair[1], template_seqA, 1) template_seqB = hit_items_B[3] template_seqB_ungapped = re.sub('-', '', template_seqB) pdbB_section = pdb_seqB[hit_items_B[5][0]-1:hit_items_B[5][-1]] for pair in itertools.izip(template_seqB_ungapped, pdbB_section): if pair[0] == 'X' or pair[0] == 'x': template_seqB = re.sub('[xX]', pair[1], template_seqB, 1) if verbose: dummy_log.write("Hits_items_A: %s\n"%([str(x) for x in hit_items_A])) if verbose: dummy_log.write("Hits_items_B: %s\n"%([str(x) for x in hit_items_B])) if verbose: dummy_log.write("pdbA_section %s\n"%pdbA_section) if verbose: dummy_log.write("pdbB_section %s\n"%pdbB_section) if verbose: dummy_log.write("length PDB A: %d\n"%len(pdb_seqA)) if verbose: dummy_log.write("length PDB B: %d\n"%len(pdb_seqB)) # Add the remaining residues at the beginning or at the end of the template sequences, if needed template_seqA = re.sub('[xX]', '-', template_seqA) if template_A_chain_start > 1: template_A_first_residues = ''.join(pdb_seqA[:hit_items_A[5][0]-1]) template_seqA = template_A_first_residues + template_seqA if hit_items_A[5][-1] < len(pdb_seqA): template_seqA += ''.join(pdb_seqA[hit_items_A[5][-1]:]) template_seqB = re.sub('[xX]', '-', template_seqB) if template_B_chain_start > 1: template_B_first_residues = ''.join(pdb_seqB[:hit_items_B[5][0]-1]) template_seqB = template_B_first_residues + template_seqB if hit_items_B[5][-1] < len(pdb_seqB): template_seqB += ''.join(pdb_seqB[hit_items_B[5][-1]:]) if verbose: dummy_log.write("FINAL template_seqA %s\n"%template_seqA) if verbose: dummy_log.write("FINAL template_seqB %s\n"%template_seqB) # Add gaps at the beginning of the query sequences, if needed gaps_number_A_chain_beginning = 0 gaps_number_B_chain_beginning = 0 if template_A_chain_start > 1: gaps_number_A_chain_beginning = int(template_A_chain_start) - 1 if template_B_chain_start > 1: gaps_number_B_chain_beginning = int(template_B_chain_start) - 1 A_chain_query_seq = ''.join(['-' for i in range(gaps_number_A_chain_beginning)]) + re.sub('[xX]', '-', hit_items_A[2]) B_chain_query_seq = ''.join(['-' for i in range(gaps_number_B_chain_beginning)]) + re.sub('[xX]', '-', hit_items_B[2]) # Add gaps at the end of the query sequences, if needed for pair in itertools.izip_longest(A_chain_query_seq, template_seqA): if pair[0] == None: A_chain_query_seq += '-' for pair in itertools.izip_longest(B_chain_query_seq, template_seqB): if pair[0] == None: B_chain_query_seq += '-' # Create PIR alignment query_whole_seq = A_chain_query_seq + '/' + B_chain_query_seq + '*' template_whole_seq = template_seqA + '/' + template_seqB + '*' header1 = '>P1;%s\nsequence:%s:%s:.:%s:.:.:.:.:.' %(query_id_A, query_id_A, query_start, query_end) header2 = '>P1;%s\nstructureX:%s:1:%s:.:%s:.:.:.:.' %(template_id_A.replace(":","-"), template_id_A.replace(":","-"), template_chain_A_chain, template_chain_B_chain) lines = [] lines.append(header1) lines.extend([query_whole_seq[i:i+60] for i in range(0, len(query_whole_seq), 60)]) lines.append(header2) lines.extend([template_whole_seq[i:i+60] for i in range(0, len(template_whole_seq), 60)]) pir_alignment = '\n'.join(lines) pir_file = open('%s/alignment.pir' %(modelling_dir), 'w+') for line in lines: pir_file.write('%s\n' %(line)) pir_file.close() # Model # Create a folder for the models of each type of interaction if '-' in query_name_A: query_name_A = query_name_A.rsplit('_', 1)[0] if '-' in query_name_B: query_name_B = query_name_B.rsplit('_', 1)[0] interaction_dir = os.path.join(output_dir , '%s::%s' %(query_name_A, query_name_B)) if not os.path.exists(interaction_dir): make_subdirs(output_dir, subdirs = ['./%s::%s' %(query_name_A, query_name_B)]) # If the models do not yet exist, proceed and add in the list of MODELS do_model=False model_path = os.path.abspath(interaction_dir) for imodel in xrange(1,numMod+1): model_name = '%s_%s_%d-%d::%s_%s_%d-%d#%d.pdb' %(template_id_A, template_chain_A_chain, current_sections[0][0], current_sections[0][1], template_id_B, template_chain_B_chain, current_sections[1][0], current_sections[1][1],imodel) model_path_model = os.path.join(model_path , model_name) #print "CHECK %s %s\n"%(do_model,model_path_model) with open(interaction_dir + '/%s.list' %(current_interaction), 'a+') as paths_to_models_file: if model_path_model not in paths_to_models_file.read(): paths_to_models_file.write(model_path_model + '\n') if not do_model and not fileExist( model_path_model ): do_model=True #Complete the set of models if do_model or force_model: # Keep the current working directory, then change to the modelling folder cwd = os.getcwd() os.chdir(modelling_dir) try: if options.optimize: process = subprocess.check_output([os.path.join(modeller_path, 'modpy.sh'), os.path.join(python_path, 'python'), os.path.join(modpy_path, 'simpleModel.py'), '--pir=' + './alignment.pir', '--out=%s-%s' %(template_name_A_chain, template_name_B_chain), '--models=%d'%(numMod), '--optimize'], stderr = subprocess.STDOUT) else: process = subprocess.check_output([os.path.join(modeller_path, 'modpy.sh'), os.path.join(python_path, 'python'), os.path.join(modpy_path, 'simpleModel.py'), '--pir=' + './alignment.pir', '--out=%s-%s' %(template_name_A_chain, template_name_B_chain), '--models=%d'%(numMod)], stderr = subprocess.STDOUT) except Exception as e: sys.stderr.write("ERROR: %s\n"%(e)) sys.stderr.write("LOCATION; %s\n"%modelling_dir) if verbose: os.system("grep get_ran %s"%(template_name_A_chain+"-"+template_name_B_chain+".log")) if verbose: sys.stderr.write("\t\tSkip models with template %s\n"%(model_name)) os.chdir(cwd) raise ModelException # Clean and rename all models for imodel in xrange(1,numMod+1): label_model=99990000+imodel input_model = '%s.B%s.pdb' %(query_id_A,str(label_model)) model_name = '%s_%s_%d-%d::%s_%s_%d-%d#%d.pdb' %(template_id_A, template_chain_A_chain, current_sections[0][0], current_sections[0][1], template_id_B, template_chain_B_chain, current_sections[1][0], current_sections[1][1],imodel) model_path_model = os.path.join(model_path , model_name) if fileExist(os.path.abspath('%s' %(input_model))): # Check contacts check_pdb_obj=PDB(os.path.abspath('%s' %(input_model))) PPI_threshold_type = config.get('Parameters', 'PPI_threshold_type') PPI_distance_threshold = float(config.get('Parameters', 'PPI_distance_threshold')) check_protein_complex = Complex(check_pdb_obj, PPI_type = PPI_threshold_type, PPI_distance = PPI_distance_threshold) if len(check_protein_complex.PPInterfaces[0].contacts) == 0: if verbose: sys.stdout.write("\t\t\t-- Skip model without contacts %s\n"%model_name) continue else: if verbose: sys.stdout.write("\t\t\t-- Accepted model %s\n"%model_name) if hydrogens: if verbose: sys.stdout.write("\t\t\t-- Adding hydrogens and relaxing the model %s\n"%model_name) output_model=model_name try: add_hydrogens(config,os.path.abspath("./"),input_model, output_model,dummy_dir) except ValueError as e: sys.stderr.write("WARNING %s\n"%e) os.rename(input_model, output_model) else: output_model=model_name os.rename(input_model, output_model) if renumerate: if verbose: sys.stdout.write("\t\t\t-- Renumerate residues as original sequence\n") output_model_renumber=model_name+".re" try: pdb_renumber=PDB() pdb_renumber=renumber_pdb(config,os.path.abspath("./"),output_model,sequences_complex,os.path.abspath("./")) pdb_renumber.write(output_model_renumber) os.rename(output_model_renumber,output_model) except Exception as e: sys.stderr.write("WARNING %s\n"%e) shutil.copy(output_model, model_path_model) os.chdir(cwd) try: shutil.rmtree(modelling_dir) except Exception as e: sys.stderr.write("WARNING first attempt to remove folder %s\n"%e) try: os.system("\\rm -r %s"%(modelling_dir)) except Exception as ee: sys.stderr.write("WARNING last attempt %s\n"%ee) return sections_modeled, remaining_sections_A, remaining_sections_B
def build_pdb_object( log: Logger, sses: List[Dict], loops: Union[List[int], int], concat: Optional[bool] = True, outfile: Optional[Union[str, Path]] = None) -> Tuple[Frame3D, List[int]]: """Make the parametrically build atoms in a :class:`.Case` into a PDB file. :param log: Job logger. :param sses: List of the secondary structures to build. Each SSE dictionary must contain the ``metadata.atoms`` keys, already in the final expected position. :param loops: Number of residues between SSE. It can be one less than the number of structures, which assumes no N- or C-terminal, or one more, which assumes N- and C-terminal residues. :param concat: When :data:`True`, return the full stucture as a single object, otherwise return a list of the individual parts. :param outfile: If provided, write the structure to file. """ if isinstance(loops, int): loops = [ loops, ] * (len(sses) - 1) if len(loops) != len(sses) - 1: raise ValueError( 'Number of loops should equal number of SSE minus one.') pieces = [] columns = [ 'auth_comp_id', 'auth_atom_id', 'auth_seq_id', 'Cartn_x', 'Cartn_y', 'Cartn_z' ] start = 1 if len(loops) < len(sses) else loops.pop(0) log.debug(f'starting numbering with: {start}') for i, sse in enumerate(sses): start = start if i == 0 else int( sses[i - 1]['length']) + loops[i - 1] + start pdb_numbering = pd.DataFrame(sse['metadata']['atoms'], columns=columns)['auth_seq_id'].values try: structure = PDB( pd.DataFrame(sse['metadata']['atoms'], columns=columns)).renumber(start) except: structure = PDB( pd.DataFrame(sse['metadata']['atoms'], columns=columns)) structure['auth_seq_id'] += (start - structure['auth_seq_id'].values[0]) structure = structure.assign(sse_id=[sse["id"]] * len(structure), pdb_num=pdb_numbering) pieces.append(structure) structure = pd.concat(pieces, sort=False).reset_index() structure['id'] = list(range(1, structure.shape[0] + 1)) if outfile is not None: structure.write(output_file=str(outfile), format='pdb', clean=True, force=TBcore.get_option('system', 'overwrite')) if not concat: return pieces return structure, [int(p.iloc[-1]['auth_seq_id']) for p in pieces]