Esempio n. 1
0
def make_oligomer(input_file,
                  largest_oligo_complexes,
                  report,
                  args,
                  residue_index_mapping=None):
    global workdir
    global input_name
    global verbosity
    global g_input_file
    global g_args
    global best_oligo_template_code
    global renamed_chains_file
    g_input_file = input_file
    g_args = args
    verbosity = args.verbosity
    workdir = os.getcwd()
    symmetry = args.symmetry

    # Subsection 2[a] #######################################################################
    if args.sequence_mode is False:
        input_name = os.path.basename(input_file).split(".pdb")[0].replace(
            '.', '_')
        candidate_qscores = {}
        # Select structurally best oligomeric template using GESAMT
        pctools.print_section(2, 'OLIGOMER ASSEMBLING')
        pctools.print_subsection('2[a]', 'Structural template selection')
        if args.multiprocess is True:
            p = Pool()
            for hitchain, average_qscore, output in p.map_async(
                    analyse_largest_complexes,
                    largest_oligo_complexes.items()).get():
                candidate_qscores[hitchain] = average_qscore
                report['hits'][hitchain]['qscore'] = round(average_qscore, 3)
                print(output)
            p.close()
            p.join()
        else:
            for item in largest_oligo_complexes.items():
                hitchain, average_qscore, output = analyse_largest_complexes(
                    item)
                candidate_qscores[hitchain] = average_qscore
                report['hits'][hitchain]['qscore'] = round(average_qscore, 3)
                print(output)

        best_oligo_template = max(candidate_qscores.keys(),
                                  key=(lambda x: candidate_qscores[x]))
        if candidate_qscores[best_oligo_template] >= args.qscore_cutoff:
            print('Structurally, the best template is: ' + clrs['y'] +
                  best_oligo_template + clrs['n'] + '. Using that!\n')
            report['best_template'] = best_oligo_template.split(':')[0]
            report['best_id'] = report['hits'][best_oligo_template]['id']
            report['best_cov'] = report['hits'][best_oligo_template][
                'coverage']
            report['best_qscore'] = report['hits'][best_oligo_template][
                'qscore']
            report['best_nchains'] = report['hits'][best_oligo_template][
                'final_homo_chains']
        else:
            print('No template had an average Q-score above cut-off of ' +
                  clrs['c'] + str(args.qscore_cutoff) + clrs['n'] +
                  '\nTry lowering the cutoff or running in sequence mode.\n')
            report['exit'] = '4'
            return None, None, report
        report['topology_figure'] = './' + best_oligo_template.replace(
            ':', '_') + '_CHOIR_Topology.png'
        template_chains = largest_oligo_complexes[best_oligo_template]
        best_oligo_template_code = best_oligo_template.split(':')[0]
        clean_template_file = make_local_template(best_oligo_template_code)

    elif args.sequence_mode is True:
        if input_file.endswith('.pdb'):
            input_name = os.path.basename(input_file).split(".pdb")[0].replace(
                '.', '_')
            input_file = os.path.join(
                workdir, input_name + '_CHOIR_MonomerSequence.fasta')
            g_input_file = input_file

        elif input_file.endswith('_CHOIR_MonomerSequence.fasta'):
            input_name = os.path.basename(input_file).split(
                "_CHOIR_MonomerSequence.fasta")[0]

        pctools.print_section(2, 'OLIGOMER ASSEMBLING - SEQUENCE MODE')
        print(clrs['y'] +
              "Skipping section 2[a] - Structural template selection" +
              clrs['n'] + "\n")
        attempt = 0
        while attempt < len(largest_oligo_complexes):
            try:
                best_oligo_template = list(largest_oligo_complexes)[attempt]
                report['best_template'] = best_oligo_template.split(':')[0]
                report['best_id'] = report['hits'][best_oligo_template]['id']
                report['best_cov'] = report['hits'][best_oligo_template][
                    'coverage']
                report['best_qscore'] = 'NA'
                report['best_nchains'] = report['hits'][best_oligo_template][
                    'final_homo_chains']
                report['topology_figure'] = './' + best_oligo_template.replace(
                    ':', '_') + '_CHOIR_Topology.png'
                template_chains = largest_oligo_complexes[best_oligo_template]
                best_oligo_template_code = best_oligo_template.split(':')[0]
                clean_template_file = make_local_template(
                    best_oligo_template_code)
                break
            except:
                attempt += 1
                if attempt < len(largest_oligo_complexes):
                    print('Attempt ' + str(attempt) +
                          ' failed, trying a differente template candidate.')
                if attempt == len(largest_oligo_complexes):
                    print('Failed to find templates in local databases.')
                    report['exit'] = '5'
                    return None, None, report

    relevant_chains_file = extract_relevant_chains(clean_template_file,
                                                   template_chains)
    if args.generate_report is True:
        report['template_figure'], pymol_output = pctools.pymol_screenshot(
            relevant_chains_file, args)
        print(pymol_output)
    renamed_chains_file, chains_dict = rename_relevant_chains(
        relevant_chains_file)
    relevant_chains = [
        chains_dict[template_chain] for template_chain in template_chains
    ]

    # Subsection 2[b] #######################################################################
    pctools.print_subsection('2[b]', 'Generating alignment')
    # Generate per chain alignment files
    alignment_files = []
    if args.sequence_mode is False:
        if args.multiprocess is True:
            p = Pool()
            for qscore, rmsd, fasta_out, gesamt_output in p.map_async(
                    run_gesamt_parallel, chains_dict.values()).get():
                alignment_files.append(fasta_out)
                print(gesamt_output)
            p.close()
            p.join()
        else:
            for chain in chains_dict.values():
                qscore, rmsd, fasta_out, gesamt_output = run_gesamt_parallel(
                    chain)
                alignment_files.append(fasta_out)
                print(gesamt_output)

    elif args.sequence_mode is True:
        if args.multiprocess is True:
            p = Pool()
            for fasta_out, output in p.map_async(alignment_from_sequence,
                                                 chains_dict.values()).get():
                alignment_files.append(fasta_out)
                print(output)
        else:
            for current_chain in chains_dict.values():
                fasta_out, output = alignment_from_sequence(current_chain)
                alignment_files.append(fasta_out)
                print(output)
    print('Alignment files:\n' + clrs['g'] +
          ('\n').join([os.path.basename(i)
                       for i in alignment_files]) + clrs['n'])

    # Generate final alignment which will be the input for Modeller
    final_alignment, full_residue_mapping = generate_ali(
        alignment_files, best_oligo_template_code, residue_index_mapping, args)
    # Score said alignment and enforce treshold
    report[
        'relative_alignment_score'], relative_wscores, nchains = score_alignment(
            final_alignment)
    print('\nFinal average relative score for alignment: ' +
          str(round(report['relative_alignment_score'], 2)) + '%')
    bad_streches = 0
    for wscore in relative_wscores:
        if wscore < args.similarity_cutoff:
            bad_streches += 1
    if bad_streches >= args.bad_streches * nchains:
        if args.sequence_mode is True:
            print(
                '\nThe alignment score was unacceptable for ' + clrs['r'] +
                str(bad_streches) + clrs['n'] +
                ' 30-res segments of the protein complex.\nTry running the default (structure) mode.\n'
            )
        else:
            print(
                '\nThe alignment score was unacceptable for ' + clrs['r'] +
                str(bad_streches) + clrs['n'] +
                ' 30-res segments of the protein complex.\nTry increasing the number of candidate templates or tweaking the similarity cut-offs.\n'
            )
        report['exit'] = '6'
        return None, None, report

    # Subsection 2[c] #######################################################################
    pctools.print_subsection('2[c]', 'Generating models')
    genmodel_file, expected_models = create_genmodel(final_alignment,
                                                     best_oligo_template_code,
                                                     relevant_chains, args)
    run_modeller(genmodel_file)

    # Record list of oligomers built
    nmodels = 0
    built_oligomers = []
    for model in expected_models:
        built_oligomers.append(
            restore_chain_identifiers(model, chains_dict,
                                      full_residue_mapping))
        nmodels += 1
    print(clrs['b'] + 'ProtCHOIR' + clrs['n'] + ' built ' + clrs['c'] +
          str(nmodels) + clrs['n'] + ' model oligomers:')
    for model in built_oligomers:
        print(clrs['g'] + model + clrs['n'])

    return best_oligo_template, built_oligomers, report
Esempio n. 2
0
def analyse_model(oligomer):
    output = []
    model_report = g_report.copy()
    model_report['model_filename'] = oligomer
    model_oligomer_name = os.path.basename(oligomer).split(
        "_CHOIR_")[0].replace('.', '_')
    output.append(pctools.subsection('3', model_oligomer_name))
    output.append('Analysing oligomer file: ' + clrs['y'] + oligomer +
                  clrs['n'] + '\n')
    model_report['model_oligomer_name'] = model_oligomer_name
    if g_args.generate_report is True:
        model_report['model_figures'], pymol_output = pctools.pymol_screenshot(
            oligomer, g_args, putty=True)
        output.append(pymol_output)
    pdb_name, structure, nchains = pctools.parse_any_structure(oligomer)
    nchains, seqs, chain_ids = pctools.extract_seqs(structure, 0)
    relevant_chains = []
    for seq in seqs:
        relevant_chains.append(seq[0])

    pisa_output, pisa_error, protomer_data = pctools.run_pisa(
        oligomer,
        '',
        g_args.verbosity,
        gen_monomer_data=True,
        gen_oligomer_data=True)
    protomer_surface_residues = pctools.get_areas(protomer_data)
    model_report['assemblied_protomer_plot'], model_report[
        'assemblied_protomer_exposed_area'], model_report[
            'assemblied_protomer_hydrophobic_area'], model_report[
                'assemblied_protomer_conserved_area'], minx, maxx, analysis_output = pctools.plot_analysis(
                    pdb_name,
                    protomer_surface_residues,
                    g_entropies,
                    g_z_entropies,
                    g_tmdata,
                    g_args,
                    minx=g_minx,
                    maxx=g_maxx)
    output.append(analysis_output)

    if 'I' in g_args.assessment and not g_args.allow_monomers:
        output.append(
            pctools.subsection('3' + '[I]', 'Interfaces Comparison: ' +
                               model_oligomer_name))
        if g_args.sequence_mode is False and g_args.skip_conservation is False:
            model_report['exposed_area_reduction'] = int(
                100 *
                (float(model_report['assemblied_protomer_exposed_area']) -
                 float(model_report['protomer_exposed_area'])) /
                float(model_report['protomer_exposed_area']))
            model_report['hydrophobic_area_reduction'] = int(
                100 *
                (float(model_report['assemblied_protomer_hydrophobic_area']) -
                 float(model_report['protomer_hydrophobic_area'])) /
                float(model_report['protomer_hydrophobic_area']))
            model_report['conserved_area_reduction'] = int(
                100 *
                (float(model_report['assemblied_protomer_conserved_area']) -
                 float(model_report['protomer_conserved_area'])) /
                float(model_report['protomer_conserved_area']))

            if model_report['exposed_area_reduction'] < -5:
                if model_report['hydrophobic_area_reduction'] < 0:
                    hydophobic_surface_score = 10 * (
                        model_report['hydrophobic_area_reduction'] /
                        model_report['exposed_area_reduction']) / 3
                else:
                    hydophobic_surface_score = 0
                if hydophobic_surface_score > 10:
                    hydophobic_surface_score = 10
                output.append('Hydrophobic surface score: ' +
                              str(hydophobic_surface_score))
                if model_report['conserved_area_reduction'] < 0:
                    conserved_surface_score = 10 * (
                        model_report['conserved_area_reduction'] /
                        model_report['exposed_area_reduction']) / 3
                else:
                    conserved_surface_score = 0
                if conserved_surface_score > 10:
                    conserved_surface_score = 10
                output.append('Conserved surface score: ' +
                              str(conserved_surface_score))
                model_report['surface_score'] = round(
                    (hydophobic_surface_score + conserved_surface_score) / 2,
                    2)
            else:
                output.append(clrs['r'] + 'Exposed area reduction too small.' +
                              clrs['n'])
                model_report['surface_score'] = 0
            output.append('Final surface score: ' +
                          str(model_report['surface_score']))
        else:
            model_report['surface_score'] = 'NA'

        model_oligomer = oligomer.split('_CHOIR_CorrectedChains')[0]
        xml_out = model_oligomer + '_CHOIR_PisaInterfaces.xml'
        model_interfaces_list, interfaces_output = pctools.parse_interfaces(
            xml_out, relevant_chains, g_args.verbosity)
        template_interfaces_list = g_interfaces_dict[g_template_hitchain]

        if model_interfaces_list and template_interfaces_list:
            if g_args.verbosity > 0:
                output.append(clrs['y'] + 'MODEL INTERFACES' + clrs['n'])
                for model_interface in model_interfaces_list:
                    output.append(clrs['y'] +
                                  ' <> '.join(model_interface['chains']) +
                                  clrs['n'])
                    output.append(clrs['y'] + 'Interface Area: ' + clrs['n'] +
                                  str(model_interface['interface area']) +
                                  ' A^2')
                    output.append(
                        clrs['y'] + 'Interface Solvation Energy: ' +
                        clrs['n'] +
                        str(model_interface['interface solvation energy']) +
                        ' kcal/mol')
                    output.append(clrs['y'] + 'Hydrogen Bonds: ' + clrs['n'] +
                                  str(model_interface['hydrogen bonds']))
                    output.append(clrs['y'] + 'Salt Bridges: ' + clrs['n'] +
                                  str(model_interface['salt bridges']))
                    output.append(clrs['y'] + 'Disulphide Bridges: ' +
                                  clrs['n'] +
                                  str(model_interface['disulphide bridges']) +
                                  "\n\n")

            interfaces_comparison = {}
            for template_interface in template_interfaces_list:
                for model_interface in model_interfaces_list:
                    if set(model_interface['chains']) == set(
                            template_interface['chains']):
                        comparison_data = {}
                        denominator = 12
                        delta_area = round(
                            model_interface['interface area'] -
                            template_interface['interface area'], 2)
                        comparison_data['model area'] = model_interface[
                            'interface area']
                        comparison_data['template area'] = template_interface[
                            'interface area']
                        comparison_data['delta area'] = delta_area
                        delta_energy = round(
                            model_interface['interface solvation energy'] -
                            template_interface['interface solvation energy'],
                            2)
                        comparison_data['model energy'] = model_interface[
                            'interface solvation energy']
                        comparison_data[
                            'template energy'] = template_interface[
                                'interface solvation energy']
                        comparison_data['delta energy'] = delta_energy
                        delta_hb = round(
                            model_interface['hydrogen bonds'] -
                            template_interface['hydrogen bonds'], 2)
                        comparison_data['model hb'] = model_interface[
                            'hydrogen bonds']
                        comparison_data['template hb'] = template_interface[
                            'hydrogen bonds']
                        comparison_data['delta hb'] = delta_hb
                        delta_sb = round(
                            model_interface['salt bridges'] -
                            template_interface['salt bridges'], 2)
                        comparison_data['model sb'] = model_interface[
                            'salt bridges']
                        comparison_data['template sb'] = template_interface[
                            'salt bridges']
                        comparison_data['delta sb'] = delta_sb
                        delta_ss = round(
                            model_interface['disulphide bridges'] -
                            template_interface['disulphide bridges'], 2)
                        comparison_data['model ss'] = model_interface[
                            'disulphide bridges']
                        comparison_data['template ss'] = template_interface[
                            'disulphide bridges']
                        comparison_data['delta ss'] = delta_ss

                        output.append(clrs['y'] + 'INTERFACES COMPARISON' +
                                      clrs['n'])
                        output.append(' <> '.join(model_interface['chains']))
                        if delta_area >= 0:
                            emphasis_color = clrs['g']
                            relative_area = 100
                        else:
                            emphasis_color = clrs['r']
                            relative_area = round(
                                model_interface['interface area'] * 100 /
                                template_interface['interface area'], 2)
                        output.append('Delta Interface Area: ' +
                                      emphasis_color + str(delta_area) +
                                      clrs['n'] + ' A^2 (' +
                                      str(relative_area) + '%)')

                        if delta_energy <= 0:
                            emphasis_color = clrs['g']
                            relative_energy = 100
                        else:
                            emphasis_color = clrs['r']
                            if model_interface[
                                    'interface solvation energy'] < 0 and template_interface[
                                        'interface solvation energy'] < 0:
                                relative_energy = round(
                                    model_interface[
                                        'interface solvation energy'] * 100 /
                                    template_interface[
                                        'interface solvation energy'], 2)
                            elif model_interface[
                                    'interface solvation energy'] > 0 and template_interface[
                                        'interface solvation energy'] < 0:
                                relative_energy = 0
                            elif model_interface[
                                    'interface solvation energy'] < 0 and template_interface[
                                        'interface solvation energy'] > 0:
                                relative_energy = 100
                            elif model_interface[
                                    'interface solvation energy'] > 0 and template_interface[
                                        'interface solvation energy'] > 0:
                                relative_energy = 0
                        output.append('Delta Interface Solvation Energy: ' +
                                      emphasis_color + str(delta_energy) +
                                      clrs['n'] + ' kcal/mol (' +
                                      str(relative_energy) + '%)')

                        if model_interface[
                                'hydrogen bonds'] == template_interface[
                                    'hydrogen bonds'] == 0:
                            relative_hb = 0
                            emphasis_color = clrs['r']
                            denominator -= 2
                        elif delta_hb >= 0:
                            relative_hb = 100
                            emphasis_color = clrs['g']
                        else:
                            emphasis_color = clrs['r']
                            relative_hb = round(
                                model_interface['hydrogen bonds'] * 100 /
                                template_interface['hydrogen bonds'], 2)
                        output.append('Delta Hydrogen Bonds: ' +
                                      emphasis_color + str(delta_hb) +
                                      clrs['n'] + ' (' + str(relative_hb) +
                                      '%)')

                        if model_interface[
                                'salt bridges'] == template_interface[
                                    'salt bridges'] == 0:
                            relative_sb = 0
                            emphasis_color = clrs['r']
                            denominator -= 3
                        elif delta_sb >= 0:
                            relative_sb = 100
                            emphasis_color = clrs['g']
                        else:
                            relative_sb = round(
                                model_interface['salt bridges'] * 100 /
                                template_interface['salt bridges'], 2)
                            emphasis_color = clrs['r']
                        output.append('Delta Salt Bridges: ' + emphasis_color +
                                      str(delta_sb) + clrs['n'] + ' (' +
                                      str(relative_sb) + '%)')

                        if model_interface[
                                'disulphide bridges'] == template_interface[
                                    'disulphide bridges'] == 0:
                            relative_ss = 0
                            emphasis_color = clrs['r']
                            denominator -= 4
                        elif delta_ss >= 0:
                            relative_ss = 100
                            emphasis_color = clrs['g']
                        else:
                            relative_ss = round(
                                model_interface['disulphide bridges'] * 100 /
                                template_interface['disulphide bridges'], 2)
                            emphasis_color = clrs['r']
                        output.append('Delta Disulphide Bridges: ' +
                                      emphasis_color + str(delta_ss) +
                                      clrs['n'] + ' (' + str(relative_ss) +
                                      '%)\n')

                        if denominator == 0:
                            comparison_data['score'] = 0
                        else:
                            comparison_data['score'] = round(
                                (relative_area + 2 * relative_energy +
                                 2 * relative_hb + 3 * relative_sb +
                                 4 * relative_ss) / denominator, 2)
                        output.append('Interface score: ' +
                                      str(comparison_data['score']))
                        interfaces_comparison[''.join(
                            sorted(
                                model_interface['chains']))] = comparison_data

            comparison_plots, interfaces_output = plot_deltas(
                model_oligomer_name, template, interfaces_comparison, g_args)
            model_report['comparison_plots'] = os.path.basename(
                comparison_plots)
            output.append(interfaces_output)
            summed_score = 0
            for interface, data in interfaces_comparison.items():
                summed_score += data['score']

            model_report['interfaces_score'] = round(
                summed_score / (10 * len(interfaces_comparison)), 2)
            output.append('Final interfaces score: ' +
                          str(model_report['interfaces_score']))
        else:
            if 'surface_score' not in model_report:
                model_report['surface_score'] = 0
            model_report['interfaces_score'] = 0

    else:
        model_report['surface_score'] = 'NA'
        model_report['interfaces_score'] = 'NA'
        model_report['comparison_plots'] = 'NA'
        model_report['assemblied_protomer_exposed_area'] = 'NA'
        model_report['assemblied_protomer_hydrophobic_area'] = 'NA'
        model_report['assemblied_protomer_conserved_area'] = 'NA'

    if 'G' in g_args.assessment:
        output.append(pctools.subsection('3' + '[G]', 'GESAMT Comparison'))
        qscore, rmsd, fasta_out, gesamt_output = pctools.run_gesamt(
            template, template_file, model_oligomer_name, oligomer, None,
            g_args)
        output.append(gesamt_output)
        model_report['gesamt_qscore'] = str(qscore)
        model_report['gesamt_rmsd'] = str(rmsd)
    else:
        model_report['gesamt_qscore'] = 'NA'
        model_report['gesamt_rmsd'] = 'NA'

    if 'M' in g_args.assessment:
        output.append(pctools.subsection('3' + '[M]', 'Molprobity Comparison'))
        model_molprobity, molprobity_output = pctools.run_molprobity(
            oligomer, g_args)
        output.append(molprobity_output)
        model_report['model_clashscore'] = str(model_molprobity['clashscore'])
        model_report['model_molprobity'] = str(
            model_molprobity['molprobity_score'])
        output.append(clrs['y'] + 'MOLPROBITY COMPARISON' + clrs['n'])
        output.append('Criterion\tTempl.\tModel')
        output.append('Rama. Fav.\t' + str(template_molprobity['rama_fav']) +
                      '\t' + str(model_molprobity['rama_fav']))
        output.append('Rama. Out.\t' + str(template_molprobity['rama_out']) +
                      '\t' + str(model_molprobity['rama_out']))
        output.append('Rot. Out.\t' + str(template_molprobity['rot_out']) +
                      '\t' + str(model_molprobity['rot_out']))
        output.append('CBeta Dev.\t' + str(template_molprobity['cb_dev']) +
                      '\t' + str(model_molprobity['cb_dev']))
        output.append('Clashscore\t' + str(template_molprobity['clashscore']) +
                      '\t' + str(model_molprobity['clashscore']))
        output.append('Molprob. Score\t' +
                      str(template_molprobity['molprobity_score']) + '\t' +
                      str(model_molprobity['molprobity_score']))
        molprobity_radar, radar_output = plot_molprobity(
            model_oligomer_name, model_molprobity, template,
            template_molprobity)
        output.append(radar_output)
        model_report['molprobity_radar'] = molprobity_radar
        delta_clashscore = (model_molprobity['clashscore'] -
                            template_molprobity['clashscore']) / 10
        output.append('Delta clashscore: ' + str(delta_clashscore))
        if delta_clashscore >= 1:
            model_report['quality_score'] = round(
                10 - math.log(delta_clashscore**5, 10), 2)
        else:
            model_report['quality_score'] = 10
        output.append('Final quality score: ' +
                      str(model_report['quality_score']))
    else:
        model_report['model_clashscore'] = 'NA'
        model_report['model_molprobity'] = 'NA'
        model_report['quality_score'] = 'NA'

    if 'M' in g_args.assessment and 'I' in g_args.assessment and not g_args.allow_monomers:
        if g_args.sequence_mode is False and g_args.skip_conservation is False:
            model_report['protchoir_score'] = round(
                sum([
                    model_report['interfaces_score'],
                    model_report['surface_score'],
                    model_report['quality_score']
                ]) / 3, 2)
        else:
            model_report['protchoir_score'] = round(
                sum([
                    model_report['interfaces_score'],
                    model_report['quality_score']
                ]) / 2, 2)
    elif 'M' in g_args.assessment:
        model_report['protchoir_score'] = model_report['quality_score']
    elif 'I' in g_args.assessment:
        if g_args.sequence_mode is False and g_args.skip_conservation is False:
            model_report['protchoir_score'] = round(
                sum([
                    model_report['interfaces_score'],
                    model_report['surface_score']
                ]) / 2, 2)
        else:
            model_report['protchoir_score'] = model_report['interfaces_score']
    else:
        model_report['protchoir_score'] = 'NA'
    if str(model_report['protchoir_score']) == 'NA':
        model_report['score_color'] = 'grey'
    elif model_report['protchoir_score'] <= 5:
        model_report['score_color'] = 'red'
    elif 5 < model_report['protchoir_score'] <= 7:
        model_report['score_color'] = 'orange'
    elif model_report['protchoir_score'] > 7:
        model_report['score_color'] = 'green'

    pickle.dump(model_report,
                open(model_oligomer_name + '_CHOIR_model_report.pickle', 'wb'))

    return model_report, '\n'.join(output)