def restore_chain_identifiers(pdb_file, chains_dict, full_residue_mapping): pname, structure, nchains = pctools.parse_any_structure(pdb_file) restored_chains_file = os.path.join(workdir, pname + "_CHOIR_CorrectedChains.pdb") chains = bpp.Selection.unfold_entities(structure, 'C') str_id = structure.id new_structure = bpp.Structure.Structure(str_id) new_model = bpp.Model.Model(0) for original, current in chains_dict.items(): for chain in chains: if chain.id == current: new_chain = bpp.Chain.Chain(current) new_chain.id = original for residue in chain: new_residue = bpp.Residue.Residue(residue.id, residue.get_resname(), residue.get_segid()) if type(full_residue_mapping[current] ) is collections.OrderedDict: for atom in residue: new_residue.add(atom) new_residue.id = ( ' ', full_residue_mapping[current][residue.id[1]], ' ') if type(full_residue_mapping[current]) is int: for atom in residue: new_residue.add(atom) new_residue.id = (' ', full_residue_mapping[current] + residue.id[1], ' ') new_chain.add(new_residue) new_model.add(new_chain) new_structure.add(new_model) io.set_structure(new_structure) io.save(restored_chains_file) return restored_chains_file
def make_local_template(best_oligo_template): middle_letters_best = best_oligo_template[1:3] if g_args.allow_monomers: best_template_file = os.path.join( pdb_archive, middle_letters_best, 'pdb' + best_oligo_template + ".ent.gz") pdb_name, contents = pctools.parse_pdb_contents(best_template_file) is_nmr = pctools.is_nmr(contents) if is_nmr: print( clrs['r'] + '\n\n Selected template ' + best_oligo_template + ' is an NMR structure \n Will try a a different candidate.\n\n' + clrs['n']) raise else: best_template_file = os.path.join(pdb_homo_archive, middle_letters_best, best_oligo_template + ".pdb.gz") clean_template_file = os.path.join( workdir, best_oligo_template + "_CHOIR_CleanTemplate.pdb") pdb_name, structure, nchains = pctools.parse_any_structure( best_template_file) io.set_structure(structure) io.save(clean_template_file, pctools.SelectIfCA()) return clean_template_file
def extract_relevant_chains(pdb_file, relevant_chains): template_name = os.path.basename(pdb_file).split('_CHOIR_')[0] pname, structure, nchains = pctools.parse_any_structure(pdb_file) relevant_chains_file = os.path.join( workdir, template_name + "_CHOIR_RelevantChains.pdb") chains = bpp.Selection.unfold_entities(structure, 'C') io.set_structure(structure) io.save(relevant_chains_file, pctools.SelectChains(relevant_chains)) return relevant_chains_file
def rename_relevant_chains(pdb_file): template_name = os.path.basename(pdb_file).split('_CHOIR_')[0] pname, structure, nchains = pctools.parse_any_structure(pdb_file) renamed_chains_file = os.path.join( workdir, template_name + "_CHOIR_RenamedChainsTemplate.pdb") chains = bpp.Selection.unfold_entities(structure, 'C') chains_dict = {} n = 1 for chain in chains: original = chain.id new = numalpha[str(n)] chain.id = 'X' + new n += 1 chains_dict[original] = new for chain in chains: chain.id = chain.id[1] io.set_structure(structure) io.save(renamed_chains_file) return renamed_chains_file, chains_dict
def analyse_model(oligomer): output = [] model_report = g_report.copy() model_report['model_filename'] = oligomer model_oligomer_name = os.path.basename(oligomer).split( "_CHOIR_")[0].replace('.', '_') output.append(pctools.subsection('3', model_oligomer_name)) output.append('Analysing oligomer file: ' + clrs['y'] + oligomer + clrs['n'] + '\n') model_report['model_oligomer_name'] = model_oligomer_name if g_args.generate_report is True: model_report['model_figures'], pymol_output = pctools.pymol_screenshot( oligomer, g_args, putty=True) output.append(pymol_output) pdb_name, structure, nchains = pctools.parse_any_structure(oligomer) nchains, seqs, chain_ids = pctools.extract_seqs(structure, 0) relevant_chains = [] for seq in seqs: relevant_chains.append(seq[0]) pisa_output, pisa_error, protomer_data = pctools.run_pisa( oligomer, '', g_args.verbosity, gen_monomer_data=True, gen_oligomer_data=True) protomer_surface_residues = pctools.get_areas(protomer_data) model_report['assemblied_protomer_plot'], model_report[ 'assemblied_protomer_exposed_area'], model_report[ 'assemblied_protomer_hydrophobic_area'], model_report[ 'assemblied_protomer_conserved_area'], minx, maxx, analysis_output = pctools.plot_analysis( pdb_name, protomer_surface_residues, g_entropies, g_z_entropies, g_tmdata, g_args, minx=g_minx, maxx=g_maxx) output.append(analysis_output) if 'I' in g_args.assessment and not g_args.allow_monomers: output.append( pctools.subsection('3' + '[I]', 'Interfaces Comparison: ' + model_oligomer_name)) if g_args.sequence_mode is False and g_args.skip_conservation is False: model_report['exposed_area_reduction'] = int( 100 * (float(model_report['assemblied_protomer_exposed_area']) - float(model_report['protomer_exposed_area'])) / float(model_report['protomer_exposed_area'])) model_report['hydrophobic_area_reduction'] = int( 100 * (float(model_report['assemblied_protomer_hydrophobic_area']) - float(model_report['protomer_hydrophobic_area'])) / float(model_report['protomer_hydrophobic_area'])) model_report['conserved_area_reduction'] = int( 100 * (float(model_report['assemblied_protomer_conserved_area']) - float(model_report['protomer_conserved_area'])) / float(model_report['protomer_conserved_area'])) if model_report['exposed_area_reduction'] < -5: if model_report['hydrophobic_area_reduction'] < 0: hydophobic_surface_score = 10 * ( model_report['hydrophobic_area_reduction'] / model_report['exposed_area_reduction']) / 3 else: hydophobic_surface_score = 0 if hydophobic_surface_score > 10: hydophobic_surface_score = 10 output.append('Hydrophobic surface score: ' + str(hydophobic_surface_score)) if model_report['conserved_area_reduction'] < 0: conserved_surface_score = 10 * ( model_report['conserved_area_reduction'] / model_report['exposed_area_reduction']) / 3 else: conserved_surface_score = 0 if conserved_surface_score > 10: conserved_surface_score = 10 output.append('Conserved surface score: ' + str(conserved_surface_score)) model_report['surface_score'] = round( (hydophobic_surface_score + conserved_surface_score) / 2, 2) else: output.append(clrs['r'] + 'Exposed area reduction too small.' + clrs['n']) model_report['surface_score'] = 0 output.append('Final surface score: ' + str(model_report['surface_score'])) else: model_report['surface_score'] = 'NA' model_oligomer = oligomer.split('_CHOIR_CorrectedChains')[0] xml_out = model_oligomer + '_CHOIR_PisaInterfaces.xml' model_interfaces_list, interfaces_output = pctools.parse_interfaces( xml_out, relevant_chains, g_args.verbosity) template_interfaces_list = g_interfaces_dict[g_template_hitchain] if model_interfaces_list and template_interfaces_list: if g_args.verbosity > 0: output.append(clrs['y'] + 'MODEL INTERFACES' + clrs['n']) for model_interface in model_interfaces_list: output.append(clrs['y'] + ' <> '.join(model_interface['chains']) + clrs['n']) output.append(clrs['y'] + 'Interface Area: ' + clrs['n'] + str(model_interface['interface area']) + ' A^2') output.append( clrs['y'] + 'Interface Solvation Energy: ' + clrs['n'] + str(model_interface['interface solvation energy']) + ' kcal/mol') output.append(clrs['y'] + 'Hydrogen Bonds: ' + clrs['n'] + str(model_interface['hydrogen bonds'])) output.append(clrs['y'] + 'Salt Bridges: ' + clrs['n'] + str(model_interface['salt bridges'])) output.append(clrs['y'] + 'Disulphide Bridges: ' + clrs['n'] + str(model_interface['disulphide bridges']) + "\n\n") interfaces_comparison = {} for template_interface in template_interfaces_list: for model_interface in model_interfaces_list: if set(model_interface['chains']) == set( template_interface['chains']): comparison_data = {} denominator = 12 delta_area = round( model_interface['interface area'] - template_interface['interface area'], 2) comparison_data['model area'] = model_interface[ 'interface area'] comparison_data['template area'] = template_interface[ 'interface area'] comparison_data['delta area'] = delta_area delta_energy = round( model_interface['interface solvation energy'] - template_interface['interface solvation energy'], 2) comparison_data['model energy'] = model_interface[ 'interface solvation energy'] comparison_data[ 'template energy'] = template_interface[ 'interface solvation energy'] comparison_data['delta energy'] = delta_energy delta_hb = round( model_interface['hydrogen bonds'] - template_interface['hydrogen bonds'], 2) comparison_data['model hb'] = model_interface[ 'hydrogen bonds'] comparison_data['template hb'] = template_interface[ 'hydrogen bonds'] comparison_data['delta hb'] = delta_hb delta_sb = round( model_interface['salt bridges'] - template_interface['salt bridges'], 2) comparison_data['model sb'] = model_interface[ 'salt bridges'] comparison_data['template sb'] = template_interface[ 'salt bridges'] comparison_data['delta sb'] = delta_sb delta_ss = round( model_interface['disulphide bridges'] - template_interface['disulphide bridges'], 2) comparison_data['model ss'] = model_interface[ 'disulphide bridges'] comparison_data['template ss'] = template_interface[ 'disulphide bridges'] comparison_data['delta ss'] = delta_ss output.append(clrs['y'] + 'INTERFACES COMPARISON' + clrs['n']) output.append(' <> '.join(model_interface['chains'])) if delta_area >= 0: emphasis_color = clrs['g'] relative_area = 100 else: emphasis_color = clrs['r'] relative_area = round( model_interface['interface area'] * 100 / template_interface['interface area'], 2) output.append('Delta Interface Area: ' + emphasis_color + str(delta_area) + clrs['n'] + ' A^2 (' + str(relative_area) + '%)') if delta_energy <= 0: emphasis_color = clrs['g'] relative_energy = 100 else: emphasis_color = clrs['r'] if model_interface[ 'interface solvation energy'] < 0 and template_interface[ 'interface solvation energy'] < 0: relative_energy = round( model_interface[ 'interface solvation energy'] * 100 / template_interface[ 'interface solvation energy'], 2) elif model_interface[ 'interface solvation energy'] > 0 and template_interface[ 'interface solvation energy'] < 0: relative_energy = 0 elif model_interface[ 'interface solvation energy'] < 0 and template_interface[ 'interface solvation energy'] > 0: relative_energy = 100 elif model_interface[ 'interface solvation energy'] > 0 and template_interface[ 'interface solvation energy'] > 0: relative_energy = 0 output.append('Delta Interface Solvation Energy: ' + emphasis_color + str(delta_energy) + clrs['n'] + ' kcal/mol (' + str(relative_energy) + '%)') if model_interface[ 'hydrogen bonds'] == template_interface[ 'hydrogen bonds'] == 0: relative_hb = 0 emphasis_color = clrs['r'] denominator -= 2 elif delta_hb >= 0: relative_hb = 100 emphasis_color = clrs['g'] else: emphasis_color = clrs['r'] relative_hb = round( model_interface['hydrogen bonds'] * 100 / template_interface['hydrogen bonds'], 2) output.append('Delta Hydrogen Bonds: ' + emphasis_color + str(delta_hb) + clrs['n'] + ' (' + str(relative_hb) + '%)') if model_interface[ 'salt bridges'] == template_interface[ 'salt bridges'] == 0: relative_sb = 0 emphasis_color = clrs['r'] denominator -= 3 elif delta_sb >= 0: relative_sb = 100 emphasis_color = clrs['g'] else: relative_sb = round( model_interface['salt bridges'] * 100 / template_interface['salt bridges'], 2) emphasis_color = clrs['r'] output.append('Delta Salt Bridges: ' + emphasis_color + str(delta_sb) + clrs['n'] + ' (' + str(relative_sb) + '%)') if model_interface[ 'disulphide bridges'] == template_interface[ 'disulphide bridges'] == 0: relative_ss = 0 emphasis_color = clrs['r'] denominator -= 4 elif delta_ss >= 0: relative_ss = 100 emphasis_color = clrs['g'] else: relative_ss = round( model_interface['disulphide bridges'] * 100 / template_interface['disulphide bridges'], 2) emphasis_color = clrs['r'] output.append('Delta Disulphide Bridges: ' + emphasis_color + str(delta_ss) + clrs['n'] + ' (' + str(relative_ss) + '%)\n') if denominator == 0: comparison_data['score'] = 0 else: comparison_data['score'] = round( (relative_area + 2 * relative_energy + 2 * relative_hb + 3 * relative_sb + 4 * relative_ss) / denominator, 2) output.append('Interface score: ' + str(comparison_data['score'])) interfaces_comparison[''.join( sorted( model_interface['chains']))] = comparison_data comparison_plots, interfaces_output = plot_deltas( model_oligomer_name, template, interfaces_comparison, g_args) model_report['comparison_plots'] = os.path.basename( comparison_plots) output.append(interfaces_output) summed_score = 0 for interface, data in interfaces_comparison.items(): summed_score += data['score'] model_report['interfaces_score'] = round( summed_score / (10 * len(interfaces_comparison)), 2) output.append('Final interfaces score: ' + str(model_report['interfaces_score'])) else: if 'surface_score' not in model_report: model_report['surface_score'] = 0 model_report['interfaces_score'] = 0 else: model_report['surface_score'] = 'NA' model_report['interfaces_score'] = 'NA' model_report['comparison_plots'] = 'NA' model_report['assemblied_protomer_exposed_area'] = 'NA' model_report['assemblied_protomer_hydrophobic_area'] = 'NA' model_report['assemblied_protomer_conserved_area'] = 'NA' if 'G' in g_args.assessment: output.append(pctools.subsection('3' + '[G]', 'GESAMT Comparison')) qscore, rmsd, fasta_out, gesamt_output = pctools.run_gesamt( template, template_file, model_oligomer_name, oligomer, None, g_args) output.append(gesamt_output) model_report['gesamt_qscore'] = str(qscore) model_report['gesamt_rmsd'] = str(rmsd) else: model_report['gesamt_qscore'] = 'NA' model_report['gesamt_rmsd'] = 'NA' if 'M' in g_args.assessment: output.append(pctools.subsection('3' + '[M]', 'Molprobity Comparison')) model_molprobity, molprobity_output = pctools.run_molprobity( oligomer, g_args) output.append(molprobity_output) model_report['model_clashscore'] = str(model_molprobity['clashscore']) model_report['model_molprobity'] = str( model_molprobity['molprobity_score']) output.append(clrs['y'] + 'MOLPROBITY COMPARISON' + clrs['n']) output.append('Criterion\tTempl.\tModel') output.append('Rama. Fav.\t' + str(template_molprobity['rama_fav']) + '\t' + str(model_molprobity['rama_fav'])) output.append('Rama. Out.\t' + str(template_molprobity['rama_out']) + '\t' + str(model_molprobity['rama_out'])) output.append('Rot. Out.\t' + str(template_molprobity['rot_out']) + '\t' + str(model_molprobity['rot_out'])) output.append('CBeta Dev.\t' + str(template_molprobity['cb_dev']) + '\t' + str(model_molprobity['cb_dev'])) output.append('Clashscore\t' + str(template_molprobity['clashscore']) + '\t' + str(model_molprobity['clashscore'])) output.append('Molprob. Score\t' + str(template_molprobity['molprobity_score']) + '\t' + str(model_molprobity['molprobity_score'])) molprobity_radar, radar_output = plot_molprobity( model_oligomer_name, model_molprobity, template, template_molprobity) output.append(radar_output) model_report['molprobity_radar'] = molprobity_radar delta_clashscore = (model_molprobity['clashscore'] - template_molprobity['clashscore']) / 10 output.append('Delta clashscore: ' + str(delta_clashscore)) if delta_clashscore >= 1: model_report['quality_score'] = round( 10 - math.log(delta_clashscore**5, 10), 2) else: model_report['quality_score'] = 10 output.append('Final quality score: ' + str(model_report['quality_score'])) else: model_report['model_clashscore'] = 'NA' model_report['model_molprobity'] = 'NA' model_report['quality_score'] = 'NA' if 'M' in g_args.assessment and 'I' in g_args.assessment and not g_args.allow_monomers: if g_args.sequence_mode is False and g_args.skip_conservation is False: model_report['protchoir_score'] = round( sum([ model_report['interfaces_score'], model_report['surface_score'], model_report['quality_score'] ]) / 3, 2) else: model_report['protchoir_score'] = round( sum([ model_report['interfaces_score'], model_report['quality_score'] ]) / 2, 2) elif 'M' in g_args.assessment: model_report['protchoir_score'] = model_report['quality_score'] elif 'I' in g_args.assessment: if g_args.sequence_mode is False and g_args.skip_conservation is False: model_report['protchoir_score'] = round( sum([ model_report['interfaces_score'], model_report['surface_score'] ]) / 2, 2) else: model_report['protchoir_score'] = model_report['interfaces_score'] else: model_report['protchoir_score'] = 'NA' if str(model_report['protchoir_score']) == 'NA': model_report['score_color'] = 'grey' elif model_report['protchoir_score'] <= 5: model_report['score_color'] = 'red' elif 5 < model_report['protchoir_score'] <= 7: model_report['score_color'] = 'orange' elif model_report['protchoir_score'] > 7: model_report['score_color'] = 'green' pickle.dump(model_report, open(model_oligomer_name + '_CHOIR_model_report.pickle', 'wb')) return model_report, '\n'.join(output)