def make_oligomer(input_file, largest_oligo_complexes, report, args, residue_index_mapping=None): global workdir global input_name global verbosity global g_input_file global g_args global best_oligo_template_code global renamed_chains_file g_input_file = input_file g_args = args verbosity = args.verbosity workdir = os.getcwd() symmetry = args.symmetry # Subsection 2[a] ####################################################################### if args.sequence_mode is False: input_name = os.path.basename(input_file).split(".pdb")[0].replace( '.', '_') candidate_qscores = {} # Select structurally best oligomeric template using GESAMT pctools.print_section(2, 'OLIGOMER ASSEMBLING') pctools.print_subsection('2[a]', 'Structural template selection') if args.multiprocess is True: p = Pool() for hitchain, average_qscore, output in p.map_async( analyse_largest_complexes, largest_oligo_complexes.items()).get(): candidate_qscores[hitchain] = average_qscore report['hits'][hitchain]['qscore'] = round(average_qscore, 3) print(output) p.close() p.join() else: for item in largest_oligo_complexes.items(): hitchain, average_qscore, output = analyse_largest_complexes( item) candidate_qscores[hitchain] = average_qscore report['hits'][hitchain]['qscore'] = round(average_qscore, 3) print(output) best_oligo_template = max(candidate_qscores.keys(), key=(lambda x: candidate_qscores[x])) if candidate_qscores[best_oligo_template] >= args.qscore_cutoff: print('Structurally, the best template is: ' + clrs['y'] + best_oligo_template + clrs['n'] + '. Using that!\n') report['best_template'] = best_oligo_template.split(':')[0] report['best_id'] = report['hits'][best_oligo_template]['id'] report['best_cov'] = report['hits'][best_oligo_template][ 'coverage'] report['best_qscore'] = report['hits'][best_oligo_template][ 'qscore'] report['best_nchains'] = report['hits'][best_oligo_template][ 'final_homo_chains'] else: print('No template had an average Q-score above cut-off of ' + clrs['c'] + str(args.qscore_cutoff) + clrs['n'] + '\nTry lowering the cutoff or running in sequence mode.\n') report['exit'] = '4' return None, None, report report['topology_figure'] = './' + best_oligo_template.replace( ':', '_') + '_CHOIR_Topology.png' template_chains = largest_oligo_complexes[best_oligo_template] best_oligo_template_code = best_oligo_template.split(':')[0] clean_template_file = make_local_template(best_oligo_template_code) elif args.sequence_mode is True: if input_file.endswith('.pdb'): input_name = os.path.basename(input_file).split(".pdb")[0].replace( '.', '_') input_file = os.path.join( workdir, input_name + '_CHOIR_MonomerSequence.fasta') g_input_file = input_file elif input_file.endswith('_CHOIR_MonomerSequence.fasta'): input_name = os.path.basename(input_file).split( "_CHOIR_MonomerSequence.fasta")[0] pctools.print_section(2, 'OLIGOMER ASSEMBLING - SEQUENCE MODE') print(clrs['y'] + "Skipping section 2[a] - Structural template selection" + clrs['n'] + "\n") attempt = 0 while attempt < len(largest_oligo_complexes): try: best_oligo_template = list(largest_oligo_complexes)[attempt] report['best_template'] = best_oligo_template.split(':')[0] report['best_id'] = report['hits'][best_oligo_template]['id'] report['best_cov'] = report['hits'][best_oligo_template][ 'coverage'] report['best_qscore'] = 'NA' report['best_nchains'] = report['hits'][best_oligo_template][ 'final_homo_chains'] report['topology_figure'] = './' + best_oligo_template.replace( ':', '_') + '_CHOIR_Topology.png' template_chains = largest_oligo_complexes[best_oligo_template] best_oligo_template_code = best_oligo_template.split(':')[0] clean_template_file = make_local_template( best_oligo_template_code) break except: attempt += 1 if attempt < len(largest_oligo_complexes): print('Attempt ' + str(attempt) + ' failed, trying a differente template candidate.') if attempt == len(largest_oligo_complexes): print('Failed to find templates in local databases.') report['exit'] = '5' return None, None, report relevant_chains_file = extract_relevant_chains(clean_template_file, template_chains) if args.generate_report is True: report['template_figure'], pymol_output = pctools.pymol_screenshot( relevant_chains_file, args) print(pymol_output) renamed_chains_file, chains_dict = rename_relevant_chains( relevant_chains_file) relevant_chains = [ chains_dict[template_chain] for template_chain in template_chains ] # Subsection 2[b] ####################################################################### pctools.print_subsection('2[b]', 'Generating alignment') # Generate per chain alignment files alignment_files = [] if args.sequence_mode is False: if args.multiprocess is True: p = Pool() for qscore, rmsd, fasta_out, gesamt_output in p.map_async( run_gesamt_parallel, chains_dict.values()).get(): alignment_files.append(fasta_out) print(gesamt_output) p.close() p.join() else: for chain in chains_dict.values(): qscore, rmsd, fasta_out, gesamt_output = run_gesamt_parallel( chain) alignment_files.append(fasta_out) print(gesamt_output) elif args.sequence_mode is True: if args.multiprocess is True: p = Pool() for fasta_out, output in p.map_async(alignment_from_sequence, chains_dict.values()).get(): alignment_files.append(fasta_out) print(output) else: for current_chain in chains_dict.values(): fasta_out, output = alignment_from_sequence(current_chain) alignment_files.append(fasta_out) print(output) print('Alignment files:\n' + clrs['g'] + ('\n').join([os.path.basename(i) for i in alignment_files]) + clrs['n']) # Generate final alignment which will be the input for Modeller final_alignment, full_residue_mapping = generate_ali( alignment_files, best_oligo_template_code, residue_index_mapping, args) # Score said alignment and enforce treshold report[ 'relative_alignment_score'], relative_wscores, nchains = score_alignment( final_alignment) print('\nFinal average relative score for alignment: ' + str(round(report['relative_alignment_score'], 2)) + '%') bad_streches = 0 for wscore in relative_wscores: if wscore < args.similarity_cutoff: bad_streches += 1 if bad_streches >= args.bad_streches * nchains: if args.sequence_mode is True: print( '\nThe alignment score was unacceptable for ' + clrs['r'] + str(bad_streches) + clrs['n'] + ' 30-res segments of the protein complex.\nTry running the default (structure) mode.\n' ) else: print( '\nThe alignment score was unacceptable for ' + clrs['r'] + str(bad_streches) + clrs['n'] + ' 30-res segments of the protein complex.\nTry increasing the number of candidate templates or tweaking the similarity cut-offs.\n' ) report['exit'] = '6' return None, None, report # Subsection 2[c] ####################################################################### pctools.print_subsection('2[c]', 'Generating models') genmodel_file, expected_models = create_genmodel(final_alignment, best_oligo_template_code, relevant_chains, args) run_modeller(genmodel_file) # Record list of oligomers built nmodels = 0 built_oligomers = [] for model in expected_models: built_oligomers.append( restore_chain_identifiers(model, chains_dict, full_residue_mapping)) nmodels += 1 print(clrs['b'] + 'ProtCHOIR' + clrs['n'] + ' built ' + clrs['c'] + str(nmodels) + clrs['n'] + ' model oligomers:') for model in built_oligomers: print(clrs['g'] + model + clrs['n']) return best_oligo_template, built_oligomers, report
def analyse_model(oligomer): output = [] model_report = g_report.copy() model_report['model_filename'] = oligomer model_oligomer_name = os.path.basename(oligomer).split( "_CHOIR_")[0].replace('.', '_') output.append(pctools.subsection('3', model_oligomer_name)) output.append('Analysing oligomer file: ' + clrs['y'] + oligomer + clrs['n'] + '\n') model_report['model_oligomer_name'] = model_oligomer_name if g_args.generate_report is True: model_report['model_figures'], pymol_output = pctools.pymol_screenshot( oligomer, g_args, putty=True) output.append(pymol_output) pdb_name, structure, nchains = pctools.parse_any_structure(oligomer) nchains, seqs, chain_ids = pctools.extract_seqs(structure, 0) relevant_chains = [] for seq in seqs: relevant_chains.append(seq[0]) pisa_output, pisa_error, protomer_data = pctools.run_pisa( oligomer, '', g_args.verbosity, gen_monomer_data=True, gen_oligomer_data=True) protomer_surface_residues = pctools.get_areas(protomer_data) model_report['assemblied_protomer_plot'], model_report[ 'assemblied_protomer_exposed_area'], model_report[ 'assemblied_protomer_hydrophobic_area'], model_report[ 'assemblied_protomer_conserved_area'], minx, maxx, analysis_output = pctools.plot_analysis( pdb_name, protomer_surface_residues, g_entropies, g_z_entropies, g_tmdata, g_args, minx=g_minx, maxx=g_maxx) output.append(analysis_output) if 'I' in g_args.assessment and not g_args.allow_monomers: output.append( pctools.subsection('3' + '[I]', 'Interfaces Comparison: ' + model_oligomer_name)) if g_args.sequence_mode is False and g_args.skip_conservation is False: model_report['exposed_area_reduction'] = int( 100 * (float(model_report['assemblied_protomer_exposed_area']) - float(model_report['protomer_exposed_area'])) / float(model_report['protomer_exposed_area'])) model_report['hydrophobic_area_reduction'] = int( 100 * (float(model_report['assemblied_protomer_hydrophobic_area']) - float(model_report['protomer_hydrophobic_area'])) / float(model_report['protomer_hydrophobic_area'])) model_report['conserved_area_reduction'] = int( 100 * (float(model_report['assemblied_protomer_conserved_area']) - float(model_report['protomer_conserved_area'])) / float(model_report['protomer_conserved_area'])) if model_report['exposed_area_reduction'] < -5: if model_report['hydrophobic_area_reduction'] < 0: hydophobic_surface_score = 10 * ( model_report['hydrophobic_area_reduction'] / model_report['exposed_area_reduction']) / 3 else: hydophobic_surface_score = 0 if hydophobic_surface_score > 10: hydophobic_surface_score = 10 output.append('Hydrophobic surface score: ' + str(hydophobic_surface_score)) if model_report['conserved_area_reduction'] < 0: conserved_surface_score = 10 * ( model_report['conserved_area_reduction'] / model_report['exposed_area_reduction']) / 3 else: conserved_surface_score = 0 if conserved_surface_score > 10: conserved_surface_score = 10 output.append('Conserved surface score: ' + str(conserved_surface_score)) model_report['surface_score'] = round( (hydophobic_surface_score + conserved_surface_score) / 2, 2) else: output.append(clrs['r'] + 'Exposed area reduction too small.' + clrs['n']) model_report['surface_score'] = 0 output.append('Final surface score: ' + str(model_report['surface_score'])) else: model_report['surface_score'] = 'NA' model_oligomer = oligomer.split('_CHOIR_CorrectedChains')[0] xml_out = model_oligomer + '_CHOIR_PisaInterfaces.xml' model_interfaces_list, interfaces_output = pctools.parse_interfaces( xml_out, relevant_chains, g_args.verbosity) template_interfaces_list = g_interfaces_dict[g_template_hitchain] if model_interfaces_list and template_interfaces_list: if g_args.verbosity > 0: output.append(clrs['y'] + 'MODEL INTERFACES' + clrs['n']) for model_interface in model_interfaces_list: output.append(clrs['y'] + ' <> '.join(model_interface['chains']) + clrs['n']) output.append(clrs['y'] + 'Interface Area: ' + clrs['n'] + str(model_interface['interface area']) + ' A^2') output.append( clrs['y'] + 'Interface Solvation Energy: ' + clrs['n'] + str(model_interface['interface solvation energy']) + ' kcal/mol') output.append(clrs['y'] + 'Hydrogen Bonds: ' + clrs['n'] + str(model_interface['hydrogen bonds'])) output.append(clrs['y'] + 'Salt Bridges: ' + clrs['n'] + str(model_interface['salt bridges'])) output.append(clrs['y'] + 'Disulphide Bridges: ' + clrs['n'] + str(model_interface['disulphide bridges']) + "\n\n") interfaces_comparison = {} for template_interface in template_interfaces_list: for model_interface in model_interfaces_list: if set(model_interface['chains']) == set( template_interface['chains']): comparison_data = {} denominator = 12 delta_area = round( model_interface['interface area'] - template_interface['interface area'], 2) comparison_data['model area'] = model_interface[ 'interface area'] comparison_data['template area'] = template_interface[ 'interface area'] comparison_data['delta area'] = delta_area delta_energy = round( model_interface['interface solvation energy'] - template_interface['interface solvation energy'], 2) comparison_data['model energy'] = model_interface[ 'interface solvation energy'] comparison_data[ 'template energy'] = template_interface[ 'interface solvation energy'] comparison_data['delta energy'] = delta_energy delta_hb = round( model_interface['hydrogen bonds'] - template_interface['hydrogen bonds'], 2) comparison_data['model hb'] = model_interface[ 'hydrogen bonds'] comparison_data['template hb'] = template_interface[ 'hydrogen bonds'] comparison_data['delta hb'] = delta_hb delta_sb = round( model_interface['salt bridges'] - template_interface['salt bridges'], 2) comparison_data['model sb'] = model_interface[ 'salt bridges'] comparison_data['template sb'] = template_interface[ 'salt bridges'] comparison_data['delta sb'] = delta_sb delta_ss = round( model_interface['disulphide bridges'] - template_interface['disulphide bridges'], 2) comparison_data['model ss'] = model_interface[ 'disulphide bridges'] comparison_data['template ss'] = template_interface[ 'disulphide bridges'] comparison_data['delta ss'] = delta_ss output.append(clrs['y'] + 'INTERFACES COMPARISON' + clrs['n']) output.append(' <> '.join(model_interface['chains'])) if delta_area >= 0: emphasis_color = clrs['g'] relative_area = 100 else: emphasis_color = clrs['r'] relative_area = round( model_interface['interface area'] * 100 / template_interface['interface area'], 2) output.append('Delta Interface Area: ' + emphasis_color + str(delta_area) + clrs['n'] + ' A^2 (' + str(relative_area) + '%)') if delta_energy <= 0: emphasis_color = clrs['g'] relative_energy = 100 else: emphasis_color = clrs['r'] if model_interface[ 'interface solvation energy'] < 0 and template_interface[ 'interface solvation energy'] < 0: relative_energy = round( model_interface[ 'interface solvation energy'] * 100 / template_interface[ 'interface solvation energy'], 2) elif model_interface[ 'interface solvation energy'] > 0 and template_interface[ 'interface solvation energy'] < 0: relative_energy = 0 elif model_interface[ 'interface solvation energy'] < 0 and template_interface[ 'interface solvation energy'] > 0: relative_energy = 100 elif model_interface[ 'interface solvation energy'] > 0 and template_interface[ 'interface solvation energy'] > 0: relative_energy = 0 output.append('Delta Interface Solvation Energy: ' + emphasis_color + str(delta_energy) + clrs['n'] + ' kcal/mol (' + str(relative_energy) + '%)') if model_interface[ 'hydrogen bonds'] == template_interface[ 'hydrogen bonds'] == 0: relative_hb = 0 emphasis_color = clrs['r'] denominator -= 2 elif delta_hb >= 0: relative_hb = 100 emphasis_color = clrs['g'] else: emphasis_color = clrs['r'] relative_hb = round( model_interface['hydrogen bonds'] * 100 / template_interface['hydrogen bonds'], 2) output.append('Delta Hydrogen Bonds: ' + emphasis_color + str(delta_hb) + clrs['n'] + ' (' + str(relative_hb) + '%)') if model_interface[ 'salt bridges'] == template_interface[ 'salt bridges'] == 0: relative_sb = 0 emphasis_color = clrs['r'] denominator -= 3 elif delta_sb >= 0: relative_sb = 100 emphasis_color = clrs['g'] else: relative_sb = round( model_interface['salt bridges'] * 100 / template_interface['salt bridges'], 2) emphasis_color = clrs['r'] output.append('Delta Salt Bridges: ' + emphasis_color + str(delta_sb) + clrs['n'] + ' (' + str(relative_sb) + '%)') if model_interface[ 'disulphide bridges'] == template_interface[ 'disulphide bridges'] == 0: relative_ss = 0 emphasis_color = clrs['r'] denominator -= 4 elif delta_ss >= 0: relative_ss = 100 emphasis_color = clrs['g'] else: relative_ss = round( model_interface['disulphide bridges'] * 100 / template_interface['disulphide bridges'], 2) emphasis_color = clrs['r'] output.append('Delta Disulphide Bridges: ' + emphasis_color + str(delta_ss) + clrs['n'] + ' (' + str(relative_ss) + '%)\n') if denominator == 0: comparison_data['score'] = 0 else: comparison_data['score'] = round( (relative_area + 2 * relative_energy + 2 * relative_hb + 3 * relative_sb + 4 * relative_ss) / denominator, 2) output.append('Interface score: ' + str(comparison_data['score'])) interfaces_comparison[''.join( sorted( model_interface['chains']))] = comparison_data comparison_plots, interfaces_output = plot_deltas( model_oligomer_name, template, interfaces_comparison, g_args) model_report['comparison_plots'] = os.path.basename( comparison_plots) output.append(interfaces_output) summed_score = 0 for interface, data in interfaces_comparison.items(): summed_score += data['score'] model_report['interfaces_score'] = round( summed_score / (10 * len(interfaces_comparison)), 2) output.append('Final interfaces score: ' + str(model_report['interfaces_score'])) else: if 'surface_score' not in model_report: model_report['surface_score'] = 0 model_report['interfaces_score'] = 0 else: model_report['surface_score'] = 'NA' model_report['interfaces_score'] = 'NA' model_report['comparison_plots'] = 'NA' model_report['assemblied_protomer_exposed_area'] = 'NA' model_report['assemblied_protomer_hydrophobic_area'] = 'NA' model_report['assemblied_protomer_conserved_area'] = 'NA' if 'G' in g_args.assessment: output.append(pctools.subsection('3' + '[G]', 'GESAMT Comparison')) qscore, rmsd, fasta_out, gesamt_output = pctools.run_gesamt( template, template_file, model_oligomer_name, oligomer, None, g_args) output.append(gesamt_output) model_report['gesamt_qscore'] = str(qscore) model_report['gesamt_rmsd'] = str(rmsd) else: model_report['gesamt_qscore'] = 'NA' model_report['gesamt_rmsd'] = 'NA' if 'M' in g_args.assessment: output.append(pctools.subsection('3' + '[M]', 'Molprobity Comparison')) model_molprobity, molprobity_output = pctools.run_molprobity( oligomer, g_args) output.append(molprobity_output) model_report['model_clashscore'] = str(model_molprobity['clashscore']) model_report['model_molprobity'] = str( model_molprobity['molprobity_score']) output.append(clrs['y'] + 'MOLPROBITY COMPARISON' + clrs['n']) output.append('Criterion\tTempl.\tModel') output.append('Rama. Fav.\t' + str(template_molprobity['rama_fav']) + '\t' + str(model_molprobity['rama_fav'])) output.append('Rama. Out.\t' + str(template_molprobity['rama_out']) + '\t' + str(model_molprobity['rama_out'])) output.append('Rot. Out.\t' + str(template_molprobity['rot_out']) + '\t' + str(model_molprobity['rot_out'])) output.append('CBeta Dev.\t' + str(template_molprobity['cb_dev']) + '\t' + str(model_molprobity['cb_dev'])) output.append('Clashscore\t' + str(template_molprobity['clashscore']) + '\t' + str(model_molprobity['clashscore'])) output.append('Molprob. Score\t' + str(template_molprobity['molprobity_score']) + '\t' + str(model_molprobity['molprobity_score'])) molprobity_radar, radar_output = plot_molprobity( model_oligomer_name, model_molprobity, template, template_molprobity) output.append(radar_output) model_report['molprobity_radar'] = molprobity_radar delta_clashscore = (model_molprobity['clashscore'] - template_molprobity['clashscore']) / 10 output.append('Delta clashscore: ' + str(delta_clashscore)) if delta_clashscore >= 1: model_report['quality_score'] = round( 10 - math.log(delta_clashscore**5, 10), 2) else: model_report['quality_score'] = 10 output.append('Final quality score: ' + str(model_report['quality_score'])) else: model_report['model_clashscore'] = 'NA' model_report['model_molprobity'] = 'NA' model_report['quality_score'] = 'NA' if 'M' in g_args.assessment and 'I' in g_args.assessment and not g_args.allow_monomers: if g_args.sequence_mode is False and g_args.skip_conservation is False: model_report['protchoir_score'] = round( sum([ model_report['interfaces_score'], model_report['surface_score'], model_report['quality_score'] ]) / 3, 2) else: model_report['protchoir_score'] = round( sum([ model_report['interfaces_score'], model_report['quality_score'] ]) / 2, 2) elif 'M' in g_args.assessment: model_report['protchoir_score'] = model_report['quality_score'] elif 'I' in g_args.assessment: if g_args.sequence_mode is False and g_args.skip_conservation is False: model_report['protchoir_score'] = round( sum([ model_report['interfaces_score'], model_report['surface_score'] ]) / 2, 2) else: model_report['protchoir_score'] = model_report['interfaces_score'] else: model_report['protchoir_score'] = 'NA' if str(model_report['protchoir_score']) == 'NA': model_report['score_color'] = 'grey' elif model_report['protchoir_score'] <= 5: model_report['score_color'] = 'red' elif 5 < model_report['protchoir_score'] <= 7: model_report['score_color'] = 'orange' elif model_report['protchoir_score'] > 7: model_report['score_color'] = 'green' pickle.dump(model_report, open(model_oligomer_name + '_CHOIR_model_report.pickle', 'wb')) return model_report, '\n'.join(output)