Esempio n. 1
0
def restore_chain_identifiers(pdb_file, chains_dict, full_residue_mapping):
    pname, structure, nchains = pctools.parse_any_structure(pdb_file)
    restored_chains_file = os.path.join(workdir,
                                        pname + "_CHOIR_CorrectedChains.pdb")
    chains = bpp.Selection.unfold_entities(structure, 'C')
    str_id = structure.id
    new_structure = bpp.Structure.Structure(str_id)
    new_model = bpp.Model.Model(0)
    for original, current in chains_dict.items():
        for chain in chains:
            if chain.id == current:
                new_chain = bpp.Chain.Chain(current)
                new_chain.id = original
                for residue in chain:
                    new_residue = bpp.Residue.Residue(residue.id,
                                                      residue.get_resname(),
                                                      residue.get_segid())
                    if type(full_residue_mapping[current]
                            ) is collections.OrderedDict:
                        for atom in residue:
                            new_residue.add(atom)
                        new_residue.id = (
                            ' ', full_residue_mapping[current][residue.id[1]],
                            ' ')
                    if type(full_residue_mapping[current]) is int:
                        for atom in residue:
                            new_residue.add(atom)
                        new_residue.id = (' ', full_residue_mapping[current] +
                                          residue.id[1], ' ')
                    new_chain.add(new_residue)
                new_model.add(new_chain)
    new_structure.add(new_model)
    io.set_structure(new_structure)
    io.save(restored_chains_file)
    return restored_chains_file
Esempio n. 2
0
def make_local_template(best_oligo_template):
    middle_letters_best = best_oligo_template[1:3]
    if g_args.allow_monomers:
        best_template_file = os.path.join(
            pdb_archive, middle_letters_best,
            'pdb' + best_oligo_template + ".ent.gz")
        pdb_name, contents = pctools.parse_pdb_contents(best_template_file)
        is_nmr = pctools.is_nmr(contents)
        if is_nmr:
            print(
                clrs['r'] + '\n\n Selected template ' + best_oligo_template +
                ' is an NMR structure \n Will try a a different candidate.\n\n'
                + clrs['n'])
            raise

    else:
        best_template_file = os.path.join(pdb_homo_archive,
                                          middle_letters_best,
                                          best_oligo_template + ".pdb.gz")
    clean_template_file = os.path.join(
        workdir, best_oligo_template + "_CHOIR_CleanTemplate.pdb")
    pdb_name, structure, nchains = pctools.parse_any_structure(
        best_template_file)
    io.set_structure(structure)
    io.save(clean_template_file, pctools.SelectIfCA())
    return clean_template_file
Esempio n. 3
0
def extract_relevant_chains(pdb_file, relevant_chains):
    template_name = os.path.basename(pdb_file).split('_CHOIR_')[0]
    pname, structure, nchains = pctools.parse_any_structure(pdb_file)
    relevant_chains_file = os.path.join(
        workdir, template_name + "_CHOIR_RelevantChains.pdb")
    chains = bpp.Selection.unfold_entities(structure, 'C')
    io.set_structure(structure)
    io.save(relevant_chains_file, pctools.SelectChains(relevant_chains))

    return relevant_chains_file
Esempio n. 4
0
def rename_relevant_chains(pdb_file):
    template_name = os.path.basename(pdb_file).split('_CHOIR_')[0]
    pname, structure, nchains = pctools.parse_any_structure(pdb_file)
    renamed_chains_file = os.path.join(
        workdir, template_name + "_CHOIR_RenamedChainsTemplate.pdb")
    chains = bpp.Selection.unfold_entities(structure, 'C')
    chains_dict = {}
    n = 1
    for chain in chains:
        original = chain.id
        new = numalpha[str(n)]
        chain.id = 'X' + new
        n += 1
        chains_dict[original] = new
    for chain in chains:
        chain.id = chain.id[1]
    io.set_structure(structure)
    io.save(renamed_chains_file)

    return renamed_chains_file, chains_dict
Esempio n. 5
0
def analyse_model(oligomer):
    output = []
    model_report = g_report.copy()
    model_report['model_filename'] = oligomer
    model_oligomer_name = os.path.basename(oligomer).split(
        "_CHOIR_")[0].replace('.', '_')
    output.append(pctools.subsection('3', model_oligomer_name))
    output.append('Analysing oligomer file: ' + clrs['y'] + oligomer +
                  clrs['n'] + '\n')
    model_report['model_oligomer_name'] = model_oligomer_name
    if g_args.generate_report is True:
        model_report['model_figures'], pymol_output = pctools.pymol_screenshot(
            oligomer, g_args, putty=True)
        output.append(pymol_output)
    pdb_name, structure, nchains = pctools.parse_any_structure(oligomer)
    nchains, seqs, chain_ids = pctools.extract_seqs(structure, 0)
    relevant_chains = []
    for seq in seqs:
        relevant_chains.append(seq[0])

    pisa_output, pisa_error, protomer_data = pctools.run_pisa(
        oligomer,
        '',
        g_args.verbosity,
        gen_monomer_data=True,
        gen_oligomer_data=True)
    protomer_surface_residues = pctools.get_areas(protomer_data)
    model_report['assemblied_protomer_plot'], model_report[
        'assemblied_protomer_exposed_area'], model_report[
            'assemblied_protomer_hydrophobic_area'], model_report[
                'assemblied_protomer_conserved_area'], minx, maxx, analysis_output = pctools.plot_analysis(
                    pdb_name,
                    protomer_surface_residues,
                    g_entropies,
                    g_z_entropies,
                    g_tmdata,
                    g_args,
                    minx=g_minx,
                    maxx=g_maxx)
    output.append(analysis_output)

    if 'I' in g_args.assessment and not g_args.allow_monomers:
        output.append(
            pctools.subsection('3' + '[I]', 'Interfaces Comparison: ' +
                               model_oligomer_name))
        if g_args.sequence_mode is False and g_args.skip_conservation is False:
            model_report['exposed_area_reduction'] = int(
                100 *
                (float(model_report['assemblied_protomer_exposed_area']) -
                 float(model_report['protomer_exposed_area'])) /
                float(model_report['protomer_exposed_area']))
            model_report['hydrophobic_area_reduction'] = int(
                100 *
                (float(model_report['assemblied_protomer_hydrophobic_area']) -
                 float(model_report['protomer_hydrophobic_area'])) /
                float(model_report['protomer_hydrophobic_area']))
            model_report['conserved_area_reduction'] = int(
                100 *
                (float(model_report['assemblied_protomer_conserved_area']) -
                 float(model_report['protomer_conserved_area'])) /
                float(model_report['protomer_conserved_area']))

            if model_report['exposed_area_reduction'] < -5:
                if model_report['hydrophobic_area_reduction'] < 0:
                    hydophobic_surface_score = 10 * (
                        model_report['hydrophobic_area_reduction'] /
                        model_report['exposed_area_reduction']) / 3
                else:
                    hydophobic_surface_score = 0
                if hydophobic_surface_score > 10:
                    hydophobic_surface_score = 10
                output.append('Hydrophobic surface score: ' +
                              str(hydophobic_surface_score))
                if model_report['conserved_area_reduction'] < 0:
                    conserved_surface_score = 10 * (
                        model_report['conserved_area_reduction'] /
                        model_report['exposed_area_reduction']) / 3
                else:
                    conserved_surface_score = 0
                if conserved_surface_score > 10:
                    conserved_surface_score = 10
                output.append('Conserved surface score: ' +
                              str(conserved_surface_score))
                model_report['surface_score'] = round(
                    (hydophobic_surface_score + conserved_surface_score) / 2,
                    2)
            else:
                output.append(clrs['r'] + 'Exposed area reduction too small.' +
                              clrs['n'])
                model_report['surface_score'] = 0
            output.append('Final surface score: ' +
                          str(model_report['surface_score']))
        else:
            model_report['surface_score'] = 'NA'

        model_oligomer = oligomer.split('_CHOIR_CorrectedChains')[0]
        xml_out = model_oligomer + '_CHOIR_PisaInterfaces.xml'
        model_interfaces_list, interfaces_output = pctools.parse_interfaces(
            xml_out, relevant_chains, g_args.verbosity)
        template_interfaces_list = g_interfaces_dict[g_template_hitchain]

        if model_interfaces_list and template_interfaces_list:
            if g_args.verbosity > 0:
                output.append(clrs['y'] + 'MODEL INTERFACES' + clrs['n'])
                for model_interface in model_interfaces_list:
                    output.append(clrs['y'] +
                                  ' <> '.join(model_interface['chains']) +
                                  clrs['n'])
                    output.append(clrs['y'] + 'Interface Area: ' + clrs['n'] +
                                  str(model_interface['interface area']) +
                                  ' A^2')
                    output.append(
                        clrs['y'] + 'Interface Solvation Energy: ' +
                        clrs['n'] +
                        str(model_interface['interface solvation energy']) +
                        ' kcal/mol')
                    output.append(clrs['y'] + 'Hydrogen Bonds: ' + clrs['n'] +
                                  str(model_interface['hydrogen bonds']))
                    output.append(clrs['y'] + 'Salt Bridges: ' + clrs['n'] +
                                  str(model_interface['salt bridges']))
                    output.append(clrs['y'] + 'Disulphide Bridges: ' +
                                  clrs['n'] +
                                  str(model_interface['disulphide bridges']) +
                                  "\n\n")

            interfaces_comparison = {}
            for template_interface in template_interfaces_list:
                for model_interface in model_interfaces_list:
                    if set(model_interface['chains']) == set(
                            template_interface['chains']):
                        comparison_data = {}
                        denominator = 12
                        delta_area = round(
                            model_interface['interface area'] -
                            template_interface['interface area'], 2)
                        comparison_data['model area'] = model_interface[
                            'interface area']
                        comparison_data['template area'] = template_interface[
                            'interface area']
                        comparison_data['delta area'] = delta_area
                        delta_energy = round(
                            model_interface['interface solvation energy'] -
                            template_interface['interface solvation energy'],
                            2)
                        comparison_data['model energy'] = model_interface[
                            'interface solvation energy']
                        comparison_data[
                            'template energy'] = template_interface[
                                'interface solvation energy']
                        comparison_data['delta energy'] = delta_energy
                        delta_hb = round(
                            model_interface['hydrogen bonds'] -
                            template_interface['hydrogen bonds'], 2)
                        comparison_data['model hb'] = model_interface[
                            'hydrogen bonds']
                        comparison_data['template hb'] = template_interface[
                            'hydrogen bonds']
                        comparison_data['delta hb'] = delta_hb
                        delta_sb = round(
                            model_interface['salt bridges'] -
                            template_interface['salt bridges'], 2)
                        comparison_data['model sb'] = model_interface[
                            'salt bridges']
                        comparison_data['template sb'] = template_interface[
                            'salt bridges']
                        comparison_data['delta sb'] = delta_sb
                        delta_ss = round(
                            model_interface['disulphide bridges'] -
                            template_interface['disulphide bridges'], 2)
                        comparison_data['model ss'] = model_interface[
                            'disulphide bridges']
                        comparison_data['template ss'] = template_interface[
                            'disulphide bridges']
                        comparison_data['delta ss'] = delta_ss

                        output.append(clrs['y'] + 'INTERFACES COMPARISON' +
                                      clrs['n'])
                        output.append(' <> '.join(model_interface['chains']))
                        if delta_area >= 0:
                            emphasis_color = clrs['g']
                            relative_area = 100
                        else:
                            emphasis_color = clrs['r']
                            relative_area = round(
                                model_interface['interface area'] * 100 /
                                template_interface['interface area'], 2)
                        output.append('Delta Interface Area: ' +
                                      emphasis_color + str(delta_area) +
                                      clrs['n'] + ' A^2 (' +
                                      str(relative_area) + '%)')

                        if delta_energy <= 0:
                            emphasis_color = clrs['g']
                            relative_energy = 100
                        else:
                            emphasis_color = clrs['r']
                            if model_interface[
                                    'interface solvation energy'] < 0 and template_interface[
                                        'interface solvation energy'] < 0:
                                relative_energy = round(
                                    model_interface[
                                        'interface solvation energy'] * 100 /
                                    template_interface[
                                        'interface solvation energy'], 2)
                            elif model_interface[
                                    'interface solvation energy'] > 0 and template_interface[
                                        'interface solvation energy'] < 0:
                                relative_energy = 0
                            elif model_interface[
                                    'interface solvation energy'] < 0 and template_interface[
                                        'interface solvation energy'] > 0:
                                relative_energy = 100
                            elif model_interface[
                                    'interface solvation energy'] > 0 and template_interface[
                                        'interface solvation energy'] > 0:
                                relative_energy = 0
                        output.append('Delta Interface Solvation Energy: ' +
                                      emphasis_color + str(delta_energy) +
                                      clrs['n'] + ' kcal/mol (' +
                                      str(relative_energy) + '%)')

                        if model_interface[
                                'hydrogen bonds'] == template_interface[
                                    'hydrogen bonds'] == 0:
                            relative_hb = 0
                            emphasis_color = clrs['r']
                            denominator -= 2
                        elif delta_hb >= 0:
                            relative_hb = 100
                            emphasis_color = clrs['g']
                        else:
                            emphasis_color = clrs['r']
                            relative_hb = round(
                                model_interface['hydrogen bonds'] * 100 /
                                template_interface['hydrogen bonds'], 2)
                        output.append('Delta Hydrogen Bonds: ' +
                                      emphasis_color + str(delta_hb) +
                                      clrs['n'] + ' (' + str(relative_hb) +
                                      '%)')

                        if model_interface[
                                'salt bridges'] == template_interface[
                                    'salt bridges'] == 0:
                            relative_sb = 0
                            emphasis_color = clrs['r']
                            denominator -= 3
                        elif delta_sb >= 0:
                            relative_sb = 100
                            emphasis_color = clrs['g']
                        else:
                            relative_sb = round(
                                model_interface['salt bridges'] * 100 /
                                template_interface['salt bridges'], 2)
                            emphasis_color = clrs['r']
                        output.append('Delta Salt Bridges: ' + emphasis_color +
                                      str(delta_sb) + clrs['n'] + ' (' +
                                      str(relative_sb) + '%)')

                        if model_interface[
                                'disulphide bridges'] == template_interface[
                                    'disulphide bridges'] == 0:
                            relative_ss = 0
                            emphasis_color = clrs['r']
                            denominator -= 4
                        elif delta_ss >= 0:
                            relative_ss = 100
                            emphasis_color = clrs['g']
                        else:
                            relative_ss = round(
                                model_interface['disulphide bridges'] * 100 /
                                template_interface['disulphide bridges'], 2)
                            emphasis_color = clrs['r']
                        output.append('Delta Disulphide Bridges: ' +
                                      emphasis_color + str(delta_ss) +
                                      clrs['n'] + ' (' + str(relative_ss) +
                                      '%)\n')

                        if denominator == 0:
                            comparison_data['score'] = 0
                        else:
                            comparison_data['score'] = round(
                                (relative_area + 2 * relative_energy +
                                 2 * relative_hb + 3 * relative_sb +
                                 4 * relative_ss) / denominator, 2)
                        output.append('Interface score: ' +
                                      str(comparison_data['score']))
                        interfaces_comparison[''.join(
                            sorted(
                                model_interface['chains']))] = comparison_data

            comparison_plots, interfaces_output = plot_deltas(
                model_oligomer_name, template, interfaces_comparison, g_args)
            model_report['comparison_plots'] = os.path.basename(
                comparison_plots)
            output.append(interfaces_output)
            summed_score = 0
            for interface, data in interfaces_comparison.items():
                summed_score += data['score']

            model_report['interfaces_score'] = round(
                summed_score / (10 * len(interfaces_comparison)), 2)
            output.append('Final interfaces score: ' +
                          str(model_report['interfaces_score']))
        else:
            if 'surface_score' not in model_report:
                model_report['surface_score'] = 0
            model_report['interfaces_score'] = 0

    else:
        model_report['surface_score'] = 'NA'
        model_report['interfaces_score'] = 'NA'
        model_report['comparison_plots'] = 'NA'
        model_report['assemblied_protomer_exposed_area'] = 'NA'
        model_report['assemblied_protomer_hydrophobic_area'] = 'NA'
        model_report['assemblied_protomer_conserved_area'] = 'NA'

    if 'G' in g_args.assessment:
        output.append(pctools.subsection('3' + '[G]', 'GESAMT Comparison'))
        qscore, rmsd, fasta_out, gesamt_output = pctools.run_gesamt(
            template, template_file, model_oligomer_name, oligomer, None,
            g_args)
        output.append(gesamt_output)
        model_report['gesamt_qscore'] = str(qscore)
        model_report['gesamt_rmsd'] = str(rmsd)
    else:
        model_report['gesamt_qscore'] = 'NA'
        model_report['gesamt_rmsd'] = 'NA'

    if 'M' in g_args.assessment:
        output.append(pctools.subsection('3' + '[M]', 'Molprobity Comparison'))
        model_molprobity, molprobity_output = pctools.run_molprobity(
            oligomer, g_args)
        output.append(molprobity_output)
        model_report['model_clashscore'] = str(model_molprobity['clashscore'])
        model_report['model_molprobity'] = str(
            model_molprobity['molprobity_score'])
        output.append(clrs['y'] + 'MOLPROBITY COMPARISON' + clrs['n'])
        output.append('Criterion\tTempl.\tModel')
        output.append('Rama. Fav.\t' + str(template_molprobity['rama_fav']) +
                      '\t' + str(model_molprobity['rama_fav']))
        output.append('Rama. Out.\t' + str(template_molprobity['rama_out']) +
                      '\t' + str(model_molprobity['rama_out']))
        output.append('Rot. Out.\t' + str(template_molprobity['rot_out']) +
                      '\t' + str(model_molprobity['rot_out']))
        output.append('CBeta Dev.\t' + str(template_molprobity['cb_dev']) +
                      '\t' + str(model_molprobity['cb_dev']))
        output.append('Clashscore\t' + str(template_molprobity['clashscore']) +
                      '\t' + str(model_molprobity['clashscore']))
        output.append('Molprob. Score\t' +
                      str(template_molprobity['molprobity_score']) + '\t' +
                      str(model_molprobity['molprobity_score']))
        molprobity_radar, radar_output = plot_molprobity(
            model_oligomer_name, model_molprobity, template,
            template_molprobity)
        output.append(radar_output)
        model_report['molprobity_radar'] = molprobity_radar
        delta_clashscore = (model_molprobity['clashscore'] -
                            template_molprobity['clashscore']) / 10
        output.append('Delta clashscore: ' + str(delta_clashscore))
        if delta_clashscore >= 1:
            model_report['quality_score'] = round(
                10 - math.log(delta_clashscore**5, 10), 2)
        else:
            model_report['quality_score'] = 10
        output.append('Final quality score: ' +
                      str(model_report['quality_score']))
    else:
        model_report['model_clashscore'] = 'NA'
        model_report['model_molprobity'] = 'NA'
        model_report['quality_score'] = 'NA'

    if 'M' in g_args.assessment and 'I' in g_args.assessment and not g_args.allow_monomers:
        if g_args.sequence_mode is False and g_args.skip_conservation is False:
            model_report['protchoir_score'] = round(
                sum([
                    model_report['interfaces_score'],
                    model_report['surface_score'],
                    model_report['quality_score']
                ]) / 3, 2)
        else:
            model_report['protchoir_score'] = round(
                sum([
                    model_report['interfaces_score'],
                    model_report['quality_score']
                ]) / 2, 2)
    elif 'M' in g_args.assessment:
        model_report['protchoir_score'] = model_report['quality_score']
    elif 'I' in g_args.assessment:
        if g_args.sequence_mode is False and g_args.skip_conservation is False:
            model_report['protchoir_score'] = round(
                sum([
                    model_report['interfaces_score'],
                    model_report['surface_score']
                ]) / 2, 2)
        else:
            model_report['protchoir_score'] = model_report['interfaces_score']
    else:
        model_report['protchoir_score'] = 'NA'
    if str(model_report['protchoir_score']) == 'NA':
        model_report['score_color'] = 'grey'
    elif model_report['protchoir_score'] <= 5:
        model_report['score_color'] = 'red'
    elif 5 < model_report['protchoir_score'] <= 7:
        model_report['score_color'] = 'orange'
    elif model_report['protchoir_score'] > 7:
        model_report['score_color'] = 'green'

    pickle.dump(model_report,
                open(model_oligomer_name + '_CHOIR_model_report.pickle', 'wb'))

    return model_report, '\n'.join(output)