Python Toolbox Examples

Programming Language: Python

Namespace/Package Name: ProtCHOIR

Class/Type: Toolbox

Examples at hotexamples.com: 19

Python Toolbox - 19 examples found. These are the top rated real world Python examples of ProtCHOIR.Toolbox extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

printv(8)

parse_any_structure(5)

print_section(3)

parse_pdb_contents(2)

extract_seqs(2)

run_molprobity(2)

run_gesamt(2)

pymol_screenshot(2)

FileFormatError(1)

split_states(1)

run_pisa(1)

print_subsection(1)

print_sorry(1)

plot_analysis(1)

parse_interfaces(1)

parse_pdb_structure(1)

SelectChains(1)

is_valid_sequence(1)

is_nmr(1)

html_report(1)

gzip_pdb(1)

get_pairwise_ids(1)

get_areas(1)

get_annotated_states(1)

check_interfaces(1)

SelectIfCA(1)

subsection(1)

Example #1

Show file

def list_new_files(pdb1_archive, assession_log, verbosity):
    '''
    Taps into the pdb1 local repository and checks if there are new files there
    which should be assessed by curate_homoDB function. It creates a list with
    files from the pdb1 database that are newer than the ones last assessed
    (registered in the dat file); which means that it takes the previous
    assession log as an input in the form of a dictionary where keys are files
    and the values correspond to the last assession time.
    Called by: curate_homoDB()
    '''
    new_files = []
    pctools.printv('Assessing files in PDB1 archive...', verbosity)
    assert os.path.isdir(pdb1_archive), clrs[
        'r'] + '\n\n Not able to find PDB archive.\n\n Does "' + pdb1_archive + '" exist?' + clrs[
            'n']
    pdbfiles = [
        os.path.join(dp, f) for dp, dn, filenames in os.walk(pdb1_archive)
        for f in filenames if f.endswith(".pdb1.gz")
    ]
    for f in pdbfiles:
        filename = f.split('/')[-1]
        mod_date = os.path.getctime(f)
        if filename not in assession_log or mod_date > float(
                assession_log[filename]):
            pctools.printv(
                clrs['y'] + f + ' should be assessed' + clrs['n'] + '...\n',
                verbosity)
            new_files.append(f)
    return new_files

Example #2

Show file

def make_local_template(best_oligo_template):
    middle_letters_best = best_oligo_template[1:3]
    if g_args.allow_monomers:
        best_template_file = os.path.join(
            pdb_archive, middle_letters_best,
            'pdb' + best_oligo_template + ".ent.gz")
        pdb_name, contents = pctools.parse_pdb_contents(best_template_file)
        is_nmr = pctools.is_nmr(contents)
        if is_nmr:
            print(
                clrs['r'] + '\n\n Selected template ' + best_oligo_template +
                ' is an NMR structure \n Will try a a different candidate.\n\n'
                + clrs['n'])
            raise

    else:
        best_template_file = os.path.join(pdb_homo_archive,
                                          middle_letters_best,
                                          best_oligo_template + ".pdb.gz")
    clean_template_file = os.path.join(
        workdir, best_oligo_template + "_CHOIR_CleanTemplate.pdb")
    pdb_name, structure, nchains = pctools.parse_any_structure(
        best_template_file)
    io.set_structure(structure)
    io.save(clean_template_file, pctools.SelectIfCA())
    return clean_template_file

Example #3

Show file

def analyse_oligomers(input_file,
                      template_hitchain,
                      oligomers_list,
                      interfaces_dict,
                      tmdata,
                      report,
                      args,
                      entropies=None,
                      z_entropies=None,
                      minx=None,
                      maxx=None):
    global g_template_hitchain
    global g_interfaces_dict
    global g_tmdata
    global g_report
    global g_args
    global g_entropies
    global g_z_entropies
    global g_minx
    global g_maxx
    global template
    global template_file
    global template_molprobity
    g_template_hitchain = template_hitchain
    g_interfaces_dict = interfaces_dict
    g_tmdata = tmdata
    g_report = report
    g_args = args
    g_entropies = entropies
    g_z_entropies = z_entropies
    g_minx = minx
    g_maxx = maxx
    pctools.print_section(3, 'OLIGOMER ANALYSIS')
    # Define template for comparisons
    template = template_hitchain.split(':')[0]
    template_file = template + '_CHOIR_RelevantChains.pdb'
    reports = []
    if 'M' in args.assessment:
        template_molprobity, molprobity_output = pctools.run_molprobity(
            template_file, args)
        print(molprobity_output)

    # Run the analysis for all models in parallel
    if args.multiprocess is True:
        p = Pool()
        for model_report, output in p.map_async(analyse_model,
                                                oligomers_list).get():
            print(output)
            reports.append(model_report)
        p.close()
        p.join()

    else:
        for oligomer in oligomers_list:
            model_report, output = analyse_model(oligomer)
            print(output)
            reports.append(model_report)

    return reports

Example #4

Show file

def extract_relevant_chains(pdb_file, relevant_chains):
    template_name = os.path.basename(pdb_file).split('_CHOIR_')[0]
    pname, structure, nchains = pctools.parse_any_structure(pdb_file)
    relevant_chains_file = os.path.join(
        workdir, template_name + "_CHOIR_RelevantChains.pdb")
    chains = bpp.Selection.unfold_entities(structure, 'C')
    io.set_structure(structure)
    io.save(relevant_chains_file, pctools.SelectChains(relevant_chains))

    return relevant_chains_file

Example #5

Show file

def restore_chain_identifiers(pdb_file, chains_dict, full_residue_mapping):
    pname, structure, nchains = pctools.parse_any_structure(pdb_file)
    restored_chains_file = os.path.join(workdir,
                                        pname + "_CHOIR_CorrectedChains.pdb")
    chains = bpp.Selection.unfold_entities(structure, 'C')
    str_id = structure.id
    new_structure = bpp.Structure.Structure(str_id)
    new_model = bpp.Model.Model(0)
    for original, current in chains_dict.items():
        for chain in chains:
            if chain.id == current:
                new_chain = bpp.Chain.Chain(current)
                new_chain.id = original
                for residue in chain:
                    new_residue = bpp.Residue.Residue(residue.id,
                                                      residue.get_resname(),
                                                      residue.get_segid())
                    if type(full_residue_mapping[current]
                            ) is collections.OrderedDict:
                        for atom in residue:
                            new_residue.add(atom)
                        new_residue.id = (
                            ' ', full_residue_mapping[current][residue.id[1]],
                            ' ')
                    if type(full_residue_mapping[current]) is int:
                        for atom in residue:
                            new_residue.add(atom)
                        new_residue.id = (' ', full_residue_mapping[current] +
                                          residue.id[1], ' ')
                    new_chain.add(new_residue)
                new_model.add(new_chain)
    new_structure.add(new_model)
    io.set_structure(new_structure)
    io.save(restored_chains_file)
    return restored_chains_file

Example #6

Show file

def update_seqres(verbosity):
    '''
    Runs wget to update the local seqres database, decompresses it and runs
    makeblastdb.
    Called by: update_databases()
    '''
    seqres_dir = os.path.join(choirdb, 'seqres')
    if not os.path.isdir(seqres_dir):
        os.mkdir(seqres_dir)
    seqres_txt = os.path.join(seqres_dir, 'pdb_seqres.txt')
    seqres_fasta = os.path.join(seqres_dir, 'seqres.fasta')
    pctools.printv('Fetching pdb_seqres.txt...', verbosity)
    attempt = 0
    while attempt < 3:
        try:
            wgetout = subprocess.check_output([
                'wget', '-m', '-r', '-nH', '--cut-dirs=3', '--user=anonymous',
                seqres_ftp, '-P', seqres_dir
            ],
                                              stderr=subprocess.STDOUT)
            break
        except:
            attempt += 1
            if attempt < 3:
                print('Attempt ' + str(attempt) + ' failed, trying again.')
            if attempt == 3:
                print(
                    'Failed to download seqres in 3 attempts. Try again later.'
                )

    no_wget = 'seqres.txt.gz’ -- not retrieving'

    if no_wget not in wgetout.decode(
            'UTF-8') or not os.path.isfile(seqres_txt):
        pctools.printv('Decompressing pdb_seqres.txt...', verbosity)

        with gzip.open(seqres_txt + '.gz',
                       'rb') as fin, open(seqres_fasta, 'wb') as fout:
            shutil.copyfileobj(fin, fout)
    if no_wget not in wgetout.decode(
            'UTF-8') or not os.path.isfile(seqres_fasta + '.pal'):
        subprocess.run([
            makeblastdb_exe, '-in', seqres_fasta, '-parse_seqids', '-dbtype',
            'prot', '-blastdb_version', '5', '-out', seqres
        ])

Example #7

Show file

def update_uniref(verbosity):
    '''
    Runs wget to update the local uniref50 database, decompresses it and runs
    makeblastdb.
    Called by: update_databases()
    '''
    uniref50_fasta = os.path.join(choirdb, 'uniref50/uniref50.fasta')
    pctools.printv('Fetching uniref50.fasta...', verbosity)
    attempt = 0
    while attempt < 3:
        try:
            wgetout = subprocess.check_output([
                'wget', '-m', '-r', '-nH', '--cut-dirs=4', '--user=anonymous',
                uniref50_ftp, '-P', choirdb
            ],
                                              stderr=subprocess.STDOUT)
            break
        except:
            attempt += 1
            if attempt < 3:
                print('Attempt ' + str(attempt) + ' failed, trying again.')
            if attempt == 3:
                print(
                    'Failed to download UniRef50 in 3 attempts. Try again later.'
                )

    no_wget = 'uniref50.fasta.gz’ -- not retrieving'

    if no_wget not in wgetout.decode(
            'UTF-8') or not os.path.isfile(uniref50_fasta):
        pctools.printv('Decompressing uniref50.fasta...', verbosity)

        with gzip.open(uniref50_fasta + '.gz',
                       'rb') as fin, open(uniref50_fasta, 'wb') as fout:
            shutil.copyfileobj(fin, fout)
    if no_wget not in wgetout.decode(
            'UTF-8') or not os.path.isfile(uniref50_fasta + '.pal'):
        subprocess.run([
            makeblastdb_exe, '-in', uniref50_fasta, '-parse_seqids', '-dbtype',
            'prot', '-out', uniref50
        ])

Example #8

Show file

def score_pairwise(seq1, seq2, matrix, gap_s, gap_e):
    score = 0
    gap = False
    ipos = 0
    fpos = 30
    nwindows = -(-len(seq1) // 30)
    pctools.printv('Number of 30-residue segments: ' + str(nwindows),
                   g_args.verbosity)
    wscores = []
    for window in range(nwindows):
        wscore = 0
        if fpos > len(seq1):
            fpos = len(seq1)
        pctools.printv(
            str(ipos + 1) + ' ' + seq1[ipos:fpos] + ' ' + str(fpos),
            g_args.verbosity)
        pctools.printv(
            str(ipos + 1) + ' ' + seq2[ipos:fpos] + ' ' + str(fpos),
            g_args.verbosity)
        for i in range(len(seq1))[ipos:fpos]:
            pair = (seq1[i], seq2[i])
            if not gap:
                if pair == ('-', '-'):
                    score += 4
                    wscore += 4
                elif '-' in pair:
                    gap = True
                    score += gap_s
                    wscore += gap_s
                else:
                    score += score_match(pair, matrix)
                    wscore += score_match(pair, matrix)
            else:
                if '-' not in pair:
                    gap = False
                    score += score_match(pair, matrix)
                    wscore += score_match(pair, matrix)
                else:
                    score += gap_e
                    wscore += gap_e

        ipos += 30
        fpos += 30
        pctools.printv('Segment score: ' + str(wscore), g_args.verbosity)
        wscores.append(wscore)

    return score, wscores

Example #9

Show file

def record_fasta(pdb_code, seqs, chain_ids, subfolder, type=None):
    if not os.path.isdir(os.path.join(pdb_homo_archive, subfolder)):
        os.mkdir(os.path.join(pdb_homo_archive, subfolder))
    type_folder = os.path.join(pdb_homo_archive, subfolder,
                               type + '_sequences')
    if not os.path.isdir(type_folder):
        os.mkdir(type_folder)
    fasta_file = os.path.join(type_folder, pdb_code + ".fasta")
    with open(fasta_file, 'w+') as f:
        for seq, chain_id in zip(seqs, chain_ids):
            if pctools.is_valid_sequence(seq[1]):
                wrapped_seq = "\n".join(tw.wrap(seq[1]))
                fasta_entry = '>' + pdb_code + ':' + str(
                    chain_id) + '\n' + wrapped_seq + '\n\n'
                f.write(fasta_entry)

Example #10

Show file

def rename_relevant_chains(pdb_file):
    template_name = os.path.basename(pdb_file).split('_CHOIR_')[0]
    pname, structure, nchains = pctools.parse_any_structure(pdb_file)
    renamed_chains_file = os.path.join(
        workdir, template_name + "_CHOIR_RenamedChainsTemplate.pdb")
    chains = bpp.Selection.unfold_entities(structure, 'C')
    chains_dict = {}
    n = 1
    for chain in chains:
        original = chain.id
        new = numalpha[str(n)]
        chain.id = 'X' + new
        n += 1
        chains_dict[original] = new
    for chain in chains:
        chain.id = chain.id[1]
    io.set_structure(structure)
    io.save(renamed_chains_file)

    return renamed_chains_file, chains_dict

Example #11

Show file

def analyse_largest_complexes(item):
    output = []
    hitchain, chains = item
    template, hit_chain = hitchain.split(':')
    middle_letters = template[1:3]
    template_file = os.path.join(pdb_homo_archive, middle_letters,
                                 template + ".pdb.gz")
    sum_qscore = 0
    chain_n = 0
    for chain in chains:
        chain_n += 1
        qscore, rmsd, fasta_out, gesamt_output = pctools.run_gesamt(
            template, template_file, input_name, g_input_file, chain, g_args)
        sum_qscore += float(qscore)
        output.append(gesamt_output)

    average_qscore = sum_qscore / chain_n
    output.append('--\n\nAverage Q-Score for all candidate chains is ' +
                  clrs['c'] + str(average_qscore) + clrs['n'] + '\n')
    output.append(
        '-------------------------------------------------------------------\n'
    )

    return hitchain, average_qscore, '\n'.join(output)

Example #12

Show file

def curate_homoDB(verbosity):
    '''
    Creates h**o-oligomeric database from a local pdb repsitory.
    The divided scheme adopted by RCSB, in which the subdirectories
    are the two middle characters in the PDB code, is assumed.
    Each database contains three key files: dat, log and fasta.
    * homodb.dat contains only the pdb codes contained in the database.
    * homodb.log contains summarized relevant information about each entry.
    * homodb.fasta contains the sequences of every chain in the database.
    Called by: update_databases()
    '''
    # Create stats folder if does not exist
    stats_dir = os.path.join(pdb_homo_archive, 'stats')
    if not os.path.isdir(stats_dir):
        os.mkdir(stats_dir)
    # Compare latest assession with new files
    assession_log = read_latest_assession(stats_dir)
    new_files = list_new_files(pdb1_archive, assession_log, verbosity)
    print(clrs['g'] + str(len(new_files)) + clrs['n'] +
          ' new structure files were found and will be processed')
    now = str(time.strftime("%d-%m-%Y@%H.%M.%S"))
    dat_file = os.path.join(stats_dir, now + '-choirdb.dat')
    log_file = os.path.join(stats_dir, now + '-choirdb.log')
    err_file = os.path.join(stats_dir, now + '-choirdb.err')
    if not os.path.isfile(dat_file):
        with open(dat_file, 'w+'):
            pass
    # Write files not to be updated to new dat file
    with open(dat_file, 'a') as f:
        for i in assession_log:
            if i not in new_files:
                f.write(i + " " + assession_log[i] + "\n")
    # Create log file
    if not os.path.isfile(log_file):
        with open(log_file, 'w+') as f:
            f.write('Code, Chains, Author, Software, Date\n')

    # Read Chain correspondences
    chain_correspondences_file = os.path.join(stats_dir,
                                              'chain_correspondences.pickle')
    if os.path.isfile(chain_correspondences_file):
        with open(chain_correspondences_file, 'rb') as p:
            chain_correspondences = pickle.load(p)
    else:
        chain_correspondences = {}

    # Main loop that will populate the ProtCHOIR database
    for pdb in pg(new_files, widgets=widgets):
        filename = pdb.split('/')[-1]
        subfolder = pdb.split('/')[-2]
        # Record assessment in dat file
        with open(dat_file, 'a') as f:
            f.write(filename + " " + str(time.time()) + '\n')
        # Start assession
        pctools.printv('\nAssessing ' + pdb + '...', verbosity)
        # Reject files larger than 10Mb
        file_size = os.stat(pdb).st_size / 1048576
        pctools.printv(
            'File size: ' + clrs['c'] + '{0:.1g}'.format(file_size) + ' Mb' +
            clrs['n'], verbosity)
        if file_size > 2:
            pctools.printv(clrs['r'] + "File size too large!" + clrs['n'],
                           verbosity)
            pctools.printv(
                clrs['y'] +
                "Will try to fetch sequences from asymmetric unit." +
                clrs['n'], verbosity)
            try:
                alternative_pdb = os.path.join(
                    pdb_archive, subfolder,
                    'pdb' + filename.split('.')[0] + '.ent.gz')
                pdb_code, structure, nchains = pctools.parse_pdb_structure(
                    alternative_pdb)
                structure, chain_correspondences[
                    pdb_code] = pctools.split_states(structure)
                nchainspostsplit, seqs, chain_ids = pctools.extract_seqs(
                    structure, 0)
                # Write in fasta file
                pctools.printv(
                    clrs['y'] + "Recording large-pdb sequence" + clrs['n'],
                    verbosity)
                record_fasta(pdb_code,
                             seqs,
                             chain_ids,
                             subfolder,
                             type='largepdb')
            except:
                pctools.printv(
                    clrs['r'] + "Failed to fetch sequence!" + clrs['n'],
                    verbosity)
            continue

        try:
            pdb_code, structure, nchains = pctools.parse_pdb_structure(pdb)
            pctools.printv(
                'Number of chains in structure ' + clrs['y'] + pdb_code +
                clrs['n'] + ': ' + str(nchains), verbosity)
            # Reject structures with more than 60 chains
            if int(nchains) > 60:
                pctools.printv(
                    "Number of chains (" + clrs['y'] + str(nchains) +
                    clrs['n'] + ") larger than 60! " + clrs['r'] +
                    "Too many chains!" + clrs['n'], verbosity)
                pctools.printv(
                    clrs['y'] + "Will try to fetch sequences anyway." +
                    clrs['n'], verbosity)
                try:
                    pdb_code, structure, nchains = pctools.parse_pdb_structure(
                        pdb)
                    structure, chain_correspondences[
                        pdb_code] = pctools.split_states(structure)
                    nchainspostsplit, seqs, chain_ids = pctools.extract_seqs(
                        structure, 0)
                    pctools.printv(
                        clrs['y'] + "Recording large-pdb sequence" + clrs['n'],
                        verbosity)
                    # Write in fasta file
                    record_fasta(pdb_code,
                                 seqs,
                                 chain_ids,
                                 subfolder,
                                 type='largepdb')
                except:
                    pctools.printv(
                        clrs['r'] + "Failed to fetch sequence!" + clrs['n'],
                        verbosity)
                continue

            structure, chain_correspondences[pdb_code] = pctools.split_states(
                structure)
            nchainspostsplit, seqs, chain_ids = pctools.extract_seqs(
                structure, 0)
            pctools.printv(
                'Number of chains (' + clrs['c'] + str(nchains) + clrs['n'] +
                ') and file size (' + clrs['c'] + str(file_size) + clrs['n'] +
                ') OK.' + clrs['g'] + ' Proceeding.' + clrs['n'] + '\n',
                verbosity)
            # Try to get info from the canonic pdb header (homonimous to pdb1)
            canonpdb = "pdb" + pdb_code + ".ent.gz"
            try:
                contents = pctools.parse_pdb_contents(
                    os.path.join(pdb_archive, subfolder, canonpdb))[1]
            except:
                pctools.printv(
                    clrs['r'] +
                    '\n\n Mismatch between pdb and biounit entries...' +
                    clrs['n'], verbosity)
            author, software = pctools.get_annotated_states(contents)
            pctools.printv(
                'Author determined biological unit = ' + str(author),
                verbosity)
            pctools.printv(
                'Software determined quaternary structure= ' + str(software),
                verbosity)
            # Start assessing sequences and structures (from 2 up to 26 chains)
            if 1 < int(nchains) < 61:
                ids, proteinpair = pctools.get_pairwise_ids(seqs, nchains)
                for id in ids:
                    if id[0] >= 90:
                        color = clrs['g']
                    else:
                        color = clrs['r']
                    pctools.printv(
                        'Identity between chains ' + clrs['y'] + str(id[1]) +
                        clrs['n'] + ' and ' + clrs['y'] + str(id[2]) +
                        clrs['n'] + ' is ' + color + str(id[0]) + "%" +
                        clrs['n'] + ".", verbosity)
                # Save records for pure h**o-oligomers
                if all(id[0] > 90 for id in ids) and proteinpair is True:
                    pctools.printv(
                        "All identities over 90%. Likely " + clrs['b'] +
                        "h**o-oligomeric" + clrs['n'] + ".", verbosity)
                    pctools.printv(clrs['y'] + "FETCHING" + clrs['n'] + ".\n",
                                   verbosity)
                    # Write file to database
                    newfile = os.path.join(pdb_homo_archive, subfolder,
                                           pdb_code + ".pdb")
                    if not os.path.isdir(
                            os.path.join(pdb_homo_archive, subfolder)):
                        os.mkdir(os.path.join(pdb_homo_archive, subfolder))
                    io.set_structure(structure)
                    io.save(newfile)
                    pctools.gzip_pdb(newfile)
                    # Write to log file
                    with open(log_file, 'a') as f:
                        f.write(
                            str(pdb_code) + "," + str(nchains) + "," +
                            '/'.join(author) + "," + '/'.join(software) + "," +
                            str(os.path.getctime(newfile + '.gz')) + '\n')
                    # Write in fasta file
                    pctools.printv(
                        clrs['y'] + "Recording h**o-oligomer sequence." +
                        clrs['n'], verbosity)
                    record_fasta(pdb_code,
                                 seqs,
                                 chain_ids,
                                 subfolder,
                                 type='h**o')

                # Investigate partial h**o-oligomers
                elif any(id[0] > 90 for id in ids) and proteinpair is True:
                    at_least_one_interface = False
                    for id in ids:
                        if id[0] > 90:
                            # Check if similar chains share interfaces
                            if pctools.check_interfaces(
                                    structure, id[1], id[2]):
                                at_least_one_interface = True
                                pctools.printv(
                                    'Contacts found between chains ' +
                                    clrs['g'] + str(id[1]) + clrs['n'] +
                                    ' and ' + clrs['g'] + str(id[2]) +
                                    clrs['n'] + ' sharing ' + clrs['g'] +
                                    str(id[0]) + clrs['n'] + " % identity.",
                                    verbosity)
                                pctools.printv(
                                    "At least one putative " + clrs['b'] +
                                    "h**o-oligomeric " + clrs['n'] +
                                    "interface found.", verbosity)
                                pctools.printv(
                                    clrs['y'] + "FETCHING" + clrs['n'] + ".\n",
                                    verbosity)
                                # Write file to database
                                newfile = os.path.join(pdb_homo_archive,
                                                       subfolder,
                                                       pdb_code + ".pdb")
                                if not os.path.isdir(
                                        os.path.join(pdb_homo_archive,
                                                     subfolder)):
                                    os.mkdir(
                                        os.path.join(pdb_homo_archive,
                                                     subfolder))
                                io.set_structure(structure)
                                io.save(newfile)
                                pctools.gzip_pdb(newfile)
                                # Write to log file
                                with open(log_file, 'a') as f:
                                    f.write(
                                        str(pdb_code) + "," + str(nchains) +
                                        "," + '/'.join(author) + "," +
                                        '/'.join(software) + "," +
                                        str(os.path.getctime(newfile +
                                                             '.gz')) + '\n')
                                # Write in fasta file
                                pctools.printv(
                                    clrs['y'] +
                                    "Recording h**o-oligomer sequence." +
                                    clrs['n'], verbosity)
                                record_fasta(pdb_code,
                                             seqs,
                                             chain_ids,
                                             subfolder,
                                             type='h**o')

                                break
                    if at_least_one_interface is False:
                        pctools.printv(
                            "No h**o-oligomeric interface found. Likely " +
                            clrs['r'] + "hetero-oligomeric" + clrs['n'] + ".",
                            verbosity)
                        pctools.printv(
                            clrs['y'] + "Recording hetero-oligomer sequence" +
                            clrs['n'], verbosity)
                        # Write in fasta file
                        record_fasta(pdb_code,
                                     seqs,
                                     chain_ids,
                                     subfolder,
                                     type='hetero')

                elif proteinpair is False:
                    pctools.printv(
                        clrs['r'] + "No proteic chain pairs found" +
                        clrs['n'] + ".", verbosity)
                    if any([set(seq[1]) != {'X'} for seq in seqs]):
                        pctools.printv(
                            clrs['y'] + "Protein sequences found though" +
                            clrs['n'], verbosity)
                        pctools.printv(
                            clrs['y'] + "Recording hetero-oligomer sequence" +
                            clrs['n'], verbosity)
                        # Write in fasta file
                        record_fasta(pdb_code,
                                     seqs,
                                     chain_ids,
                                     subfolder,
                                     type='hetero')
                    else:
                        pctools.printv(
                            clrs['r'] +
                            "Not even a single protein chain. Disregarding." +
                            clrs['n'], verbosity)

                else:
                    pctools.printv(
                        "No similar chains found. Likely " + clrs['r'] +
                        "hetero-oligomeric" + clrs['n'] + ".", verbosity)
                    pctools.printv(
                        clrs['y'] + "Recording hetero-oligomer sequence" +
                        clrs['n'], verbosity)
                    record_fasta(pdb_code,
                                 seqs,
                                 chain_ids,
                                 subfolder,
                                 type='hetero')

            elif int(nchains) == 1:
                pctools.printv(
                    "Only one chain found. Likely " + clrs['r'] + "monomeric" +
                    clrs['n'] + ".", verbosity)
                pctools.printv(
                    clrs['y'] + "Recording monomer sequence." + clrs['n'],
                    verbosity)
                structure, chain_correspondences[
                    pdb_code] = pctools.split_states(structure)
                nchains, seqs, chain_ids = pctools.extract_seqs(structure, 0)
                record_fasta(pdb_code, seqs, chain_ids, subfolder, type='mono')

        except:
            errtype, errvalue, errtraceback = sys.exc_info()
            errtypeshort = str(errtype).split('\'')[1]
            pctools.printv(
                clrs['r'] + '*' + str(errtypeshort) + ': ' + str(errvalue) +
                ' l.' + str(errtraceback.tb_lineno) + '*' + clrs['n'],
                verbosity)
            traceback.print_exception(*sys.exc_info())
            if errtypeshort == 'KeyboardInterrupt':
                quit()
            #pctools.printv(clrs['r']+"UNKNOWN FAULT"+clrs['n']+".", verbosity)
            if not os.path.isfile(err_file):
                with open(err_file, 'w+') as f:
                    pass
            with open(err_file, 'a') as f:
                f.write(filename + '\n')
            continue

    with open(chain_correspondences_file, 'wb') as p:
        pickle.dump(chain_correspondences, p, protocol=pickle.HIGHEST_PROTOCOL)

    if not os.path.isfile(err_file):
        with open(err_file, 'w+') as f:
            f.write('\nNo errors. Assessment terminated succesfully.\n')

Example #13

Show file

def main():

    args = initial_args

    # Define multiprocessing options
    args.available_cores = cpu_count()

    if args.force_single_core is True:
        args.multiprocess = False
        args.psiblast_threads = 1
        args.modeller_threads = 1
    else:
        if args.psiblast_threads is None:
            args.psiblast_threads = args.available_cores
        if args.modeller_threads is None:
            args.modeller_threads = min([args.available_cores, args.models])

    if args.update is True:
        print(
            tw.dedent("""
                                         !WARNING!

                      You have chosen to updtate the local databases.

              ** The root directory for the database files is: """ +
                      clrs['y'] + choirdb + clrs['n'] + """

              ** The path to local pdb mirror is: """ + clrs['y'] +
                      pdb_archive + clrs['n'] + """

              ** The path to local pdb biounit mirror is: """ + clrs['y'] +
                      pdb1_archive + clrs['n'] + """

              ** The path to local gesamt archive is: """ + clrs['y'] +
                      ges_homo_archive + clrs['n'] + """

              ** The path to local UniRef50 blast database is: """ +
                      clrs['y'] + uniref50 + clrs['n'] + """


              This could take a long time.

              <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

              """))
        option = input('Do you confirm the information above? (y/n)')
        if option == 'y' or option == 'Y' or option == 'YES' or option == 'yes' or option == 'Yes':
            update_databases(args.verbosity)
            print('\n\nDone updating all databases. Exiting.\n')
        else:
            print('\n\nNo positive confirmation, will not update databases.\n')
            exit()
    # Actually run oligomerization protocol
    else:
        outdir = os.getcwd()
        input_file = args.input_file
        assert os.path.isdir(pdb_archive), clrs[
            'r'] + '\n\n Not able to find PDB directory.\n\n Does "' + pdb_archive + '" exist?' + clrs[
                'n']
        assert os.path.isdir(pdb1_archive), clrs[
            'r'] + '\n\n Not able to find PDB1 assemblies directory.\n\n Does "' + pdb1_archive + '" exist?' + clrs[
                'n']
        assert os.path.isdir(pdb_homo_archive), clrs[
            'r'] + '\n\n Not able to find ProtCHOIR database directory.\n\n Does "' + pdb_homo_archive + '" exist?' + clrs[
                'n']
        assert os.path.isdir(ges_homo_archive), clrs[
            'r'] + '\n\n Not able to find GESAMT archive directory.\n\n Does "' + ges_homo_archive + '" exist?' + clrs[
                'n']
        assert args.refine_level in [0, 1, 2, 3, 4], clrs[
            'r'] + '\n\n Refinement level must be an integer number from 0 to 4.\n Run ProtCHOIR -h for more information\n\n' + clrs[
                'n']
        assert args.psiblast_params in psiblast_params, clrs[
            'r'] + '\n\n PSI-BLAST parameters invalid.\n Run ProtCHOIR -h for more information\n\n' + clrs[
                'n']
        assert input_file is not None, clrs[
            'r'] + '\n\n Please inform the input file name.\n Run ProtCHOIR -h for more information.\n\n' + clrs[
                'n']
        assert os.path.isfile(input_file), clrs[
            'r'] + '\n\n Not able to find input file.\n\n Does "' + input_file + '" exist?\n' + clrs[
                'n']
        assert args.zip_output in [0, 1, 2], clrs[
            'r'] + '\n\n Compression level must be an integer number between 0 and 2.\n Run ProtCHOIR -h for more information\n\n' + clrs[
                'n']
        assert all([
            i in set('MIG') for i in set(args.assessment)
        ]) or args.assessment == 'N', clrs[
            'r'] + '\n\n Oligomer assessment type do not comply.\n Choose any combination of [G]Gesamt, [M]Molprobity, [I]Interfaces or choose [N] for None\n\n' + clrs[
                'n']

        # Force generation of topologies and all assessments if final report is requested
        if args.generate_report is True:
            args.assessment = 'MIG'
            args.plot_topologies = True

        # Deal with dots and dashes in the input file and remove dots
        if input_file.lower().endswith('.pdb'):
            input_basename = os.path.basename(input_file).split('.pdb')[0]
            input_basename = input_basename.replace(".", "_")
            input_basename = input_basename.replace("-", "_")
            new_input_file = input_basename + '.pdb'
            if os.path.basename(input_file) == os.path.basename(
                    new_input_file):
                pass
            else:
                shutil.copy(input_file, new_input_file)

        # Also process filename to fasta header if input file is fasta
        elif input_file.lower().endswith('.fasta'):
            input_basename = os.path.basename(input_file).split('.fasta')[0]
            input_basename = input_basename.replace(".", "_")
            input_basename = input_basename.replace("-", "_")
            new_input_file = os.path.join(
                outdir, input_basename + '_CHOIR_MonomerSequence.fasta')
            with open(input_file, 'r') as infile, open(new_input_file,
                                                       'w') as outfile:
                outfile.write('>' + input_basename + '\n')
                n = 0
                for line in infile.readlines():
                    if not line.startswith('>'):
                        outfile.write(line)
                    else:
                        n += 1
                    if n == 2:
                        break
            args.sequence_mode = True
        else:
            raise pctools.FileFormatError(
                clrs['r'] +
                '\n\n Input format must be either pdb or fasta\n Run ./ProtCHOIR -h for more information\n\n'
                + clrs['n'])
        if args.allow_monomers:
            assert args.sequence_mode is True, clrs[
                'r'] + '\n\n To allow building monomers you must use sequence mode. \n Run ProtCHOIR -h for more information\n\n' + clrs[
                    'n']

        # Start recording job progress
        with open('CHOIR_Progress.out', 'w') as f:
            f.write("Starting new ProtCHOIR run\n")

        # Pickle Runtime arguments
        pickle.dump(args, open('CHOIR_Args.pickle', 'wb'))

        # Show arguments used and create CHOIR.conf
        pctools.print_section(0, "Runtime Arguments")
        runtime_arguments = {}
        choir_args = os.path.join(outdir, "CHOIR.args")
        with open(choir_args, 'w') as f:
            for name, value in vars(args).items():
                runtime_arguments[name] = value
                print(name + "=" + str(value))
                f.write(name + "=" + str(value) + "\n")
        print('\nRuntime parameters written to: ' + clrs['g'] +
              os.path.basename(choir_args) + clrs['n'] + '\n')

        # Initialize report
        report = {}
        report['runtime_arguments'] = runtime_arguments
        report['input_filename'] = os.path.basename(new_input_file)

        # Write errorprof placeholder summary
        placeholder_report = report.copy()
        report_data = [
            'input_filename', 'sequence_mode', 'templatedmodel',
            'protomer_residues', 'tmspans', 'highest_scoring_state',
            'homo_oligomeric_over_other_score', 'best_template',
            'best_nchains', 'best_id', 'best_cov', 'best_qscore',
            'model_oligomer_name', 'model_molprobity', 'gesamt_rmsd',
            'protchoir_score', 'surface_score', 'interfaces_score',
            'quality_score', 'total_runtime', 'exit'
        ]
        for data in report_data:
            if data not in placeholder_report:
                placeholder_report[data] = 'NA'
        with open(input_basename + '_CHOIR_Summary.tsv', 'w') as f:
            f.write(
                'Input\tSeq.Mode\tTemplated\tLength\tTMSpans\tLikelyState\tH3OScore\tTemplate\tChains\tIdentity\tCoverage\tAv.QScore\tBestModel\tMolprobity\tRMSD\tProtCHOIR\tSurface\tInterfaces\tQuality\tRuntime\tExit\n'
            )
            f.write('\t'.join(
                [str(placeholder_report[data])
                 for data in report_data]) + '\n')

        # Start analysis of protomer
        analyse_protomer_results, report, args = analyze_protomer(
            new_input_file, report, args)

        # If no suitable h**o-oligomeric template wasfound, exit nicely.
        if analyse_protomer_results is None:
            finalize(report, input_basename, start_time, start_timestamp, args)
            pctools.print_sorry()
            sys.exit(0)

        # Else, proceed conditionally on runtime arguments
        elif analyse_protomer_results is not None and args.sequence_mode is True:
            residue_index_mapping = None
            minx = None
            maxx = None
            if args.skip_conservation:
                entropies = None
                z_entropies = None
                pdb_name, clean_input_file, largest_oligo_complexes, interfaces_dict, tmdata = analyse_protomer_results
            elif not args.skip_conservation:
                pdb_name, clean_input_file, largest_oligo_complexes, interfaces_dict, entropies, z_entropies, tmdata = analyse_protomer_results
                if entropies == z_entropies == minx == maxx == None:
                    args.skip_conservation = True

        elif analyse_protomer_results is not None and args.sequence_mode is False:
            if args.skip_conservation:
                minx = None
                maxx = None
                entropies = None
                z_entropies = None
                pdb_name, clean_input_file, largest_oligo_complexes, interfaces_dict, residue_index_mapping, tmdata = analyse_protomer_results
            elif not args.skip_conservation:
                pdb_name, clean_input_file, largest_oligo_complexes, interfaces_dict, entropies, z_entropies, residue_index_mapping, minx, maxx, tmdata = analyse_protomer_results
                if entropies == z_entropies == minx == maxx == None:
                    args.skip_conservation = True

        report['runtime_arguments'][
            'skip_conservation'] = args.skip_conservation

        new_input_file = clean_input_file

        # Use information of complexes to build oligomers
        best_oligo_template, built_oligomers, report = make_oligomer(
            new_input_file,
            largest_oligo_complexes,
            report,
            args,
            residue_index_mapping=residue_index_mapping)

        # If no models were built, exit nicely.
        if built_oligomers is None:
            finalize(report, input_basename, start_time, start_timestamp, args)
            pctools.print_sorry()
            sys.exit(0)

        # Analyse built models
        reports = analyse_oligomers(new_input_file,
                                    best_oligo_template,
                                    built_oligomers,
                                    interfaces_dict,
                                    tmdata,
                                    report,
                                    args,
                                    entropies=entropies,
                                    z_entropies=z_entropies,
                                    minx=minx,
                                    maxx=maxx)
        finalize(reports, input_basename, start_time, start_timestamp, args)

Example #14

Show file

def finalize(reports, input_basename, start_time, start_timestamp, args):
    report_data = [
        'input_filename', 'sequence_mode', 'templatedmodel',
        'protomer_residues', 'tmspans', 'highest_scoring_state',
        'homo_oligomeric_over_other_score', 'best_template', 'best_nchains',
        'best_id', 'best_cov', 'best_qscore', 'model_oligomer_name',
        'model_molprobity', 'gesamt_rmsd', 'quality_score', 'surface_score',
        'interfaces_score', 'protchoir_score', 'total_runtime', 'exit'
    ]
    if type(reports) is list:
        if args.zip_output == 2:
            # Don't prevent compression of anything
            nozip = []
            for report in reports:
                if args.generate_report is True:
                    report['html_report'] = pctools.html_report(report, args)
        else:
            # Prevent compression of files needed for the report and the models
            nozip = [
                os.path.basename(report['model_filename'])
                for report in reports
            ]
            for report in reports:
                if args.generate_report is True:
                    report['html_report'] = pctools.html_report(report, args)
                    for key, value in report.items():
                        if key in [
                                'html_report', 'molprobity_radar',
                                'comparison_plots', 'protomer_figure',
                                'protomer_plot', 'template_figure',
                                'topology_figure', 'assemblied_protomer_plot',
                                'input_filename'
                        ]:
                            nozip.append(os.path.basename(value))
                        if key == 'model_figures':
                            for figure in value:
                                nozip.append(os.path.basename(figure))

        best_report = sorted(reports,
                             key=operator.itemgetter('protchoir_score'))[-1]

    elif type(reports) is dict:
        nozip = []
        best_report = reports
        for data in report_data:
            if data not in best_report:
                best_report[data] = 'NA'

    # Generate summary tsv file for the best report
    end_time = datetime.now()
    runtime = end_time - start_time
    best_report['total_runtime'] = str(runtime.seconds)
    summary_file = input_basename + '_CHOIR_Summary.tsv'
    nozip.append(summary_file)
    if 'exit' not in best_report:
        best_report['exit'] = '0'
        with open('CHOIR_Progress.out', 'a') as f:
            f.write(datetime.now().strftime("%H:%M:%S") +
                    ": Finished running ProtCHOIR!")
    elif best_report['exit'] == '1':
        with open('CHOIR_Progress.out', 'a') as f:
            f.write(
                datetime.now().strftime("%H:%M:%S") +
                ": ERROR! Indicated template not found in oligomers database..."
            )
    elif best_report['exit'] == '2':
        with open('CHOIR_Progress.out', 'a') as f:
            f.write(datetime.now().strftime("%H:%M:%S") +
                    ": ERROR! Failed to find suitable homologues...")
    elif best_report['exit'] == '3':
        with open('CHOIR_Progress.out', 'a') as f:
            f.write(
                datetime.now().strftime("%H:%M:%S") +
                ": ERROR! Failed to find suitable h**o-oligomeri interfaces..."
            )
    elif best_report['exit'] == '4':
        with open('CHOIR_Progress.out', 'a') as f:
            f.write(
                datetime.now().strftime("%H:%M:%S") +
                ": ERROR! No template had an average Q-score above cut-off...")
    elif best_report['exit'] == '5':
        with open('CHOIR_Progress.out', 'a') as f:
            f.write(datetime.now().strftime("%H:%M:%S") +
                    ": ERROR! Failed to find templates in local databases...")
    elif best_report['exit'] == '6':
        with open('CHOIR_Progress.out', 'a') as f:
            f.write(
                datetime.now().strftime("%H:%M:%S") +
                ": ERROR! Sub-optimal alignment between template and target sequences..."
            )

    with open(summary_file, 'w') as f:
        f.write(
            'Input\tSeq.Mode\tTemplated\tLength\tTMSpans\tLikelyState\tH3OScore\tTemplate\tChains\tIdentity\tCoverage\tAv.QScore\tBestModel\tMolprobity\tRMSD\tQuality\tSurface\tInterfaces\tProtCHOIR\tRuntime\tExit\n'
        )
        f.write('\t'.join([str(best_report[data])
                           for data in report_data]) + '\n')
    # Finalise
    final_end_time = datetime.timestamp(datetime.now())
    time.sleep(1)

    # Compress output
    if args.zip_output > 0:
        try:
            import zlib
            compression = zipfile.ZIP_DEFLATED
        except (ImportError, AttributeError):
            compression = zipfile.ZIP_STORED

        with zipfile.ZipFile(input_basename + '_ProtCHOIR_OUT.zip',
                             'w',
                             compression=compression) as zipf:
            for f in os.listdir(os.getcwd()):
                if f != input_basename + '_ProtCHOIR_OUT.zip' and os.path.getctime(
                        f) > start_timestamp and os.path.getctime(
                            f) < final_end_time:
                    print('Compressing... ' + f)
                    zipf.write(f)
                    if f not in nozip:
                        if os.path.isdir(f):
                            shutil.rmtree(f)
                        elif os.path.isfile(f):
                            os.remove(f)

    print('FINISHED AT: ' + datetime.now().strftime("%d-%m-%Y %H:%M"))
    print('TOTAL RUNTIME: ' + str(runtime.seconds) + ' s')

Example #15

Show file

def generate_ali(alignments, best_oligo_template, residue_index_mapping, args):
    best_oligo_template_file = best_oligo_template + "_CHOIR_RenamedChainsTemplate"
    final_alignment = os.path.join(
        workdir,
        input_name + '_' + best_oligo_template + '_CHOIR_Alignment.ali')
    getseq = False
    alignment_dict = {}
    full_residue_mapping = {}
    # Parse individual GESAMT alignments and organize in a per-chain dictionary
    for fasta_alignment in alignments:
        getseq = False
        template = False
        chain = None
        entryseq_dict = {}
        for line in open(fasta_alignment, 'r').readlines():
            # Only record sequence if line above starts with >
            if getseq is True:
                getseq = False
                seq = line.replace('\n', '')
                # If this is the template, count leading and trailing gaps
                if template is True:
                    template = False
                    leading_gaps = 0
                    for r in seq:
                        if r == '-':
                            leading_gaps += 1
                        else:
                            break
                    trailing_gaps = 0
                    for r in seq[::-1]:
                        if r == '-':
                            trailing_gaps += 1
                        else:
                            break
                assert seq is not None, 'Sequence is None'
                assert seq != '', 'Sequence is empty'
                entryseq_dict[entry] = seq.upper()
                del seq
            # If it is an entry line, get details and expect sequence
            if line.startswith('>'):
                entry = line.split('>')[1].split('(')[0].split(
                    '.pdb')[0].replace('\n', '')
                # If entry is template, use chain as reference
                if entry == best_oligo_template_file:
                    chain = line.split('(')[1].split(')')[0]
                    template = True
                getseq = True

        # Remove leading and trailing gaps from the alignment for both template and query
        if trailing_gaps == 0:
            for entry, seq in entryseq_dict.items():
                entryseq_dict[entry] = leading_gaps * '-' + seq[leading_gaps:]
        else:
            for entry, seq in entryseq_dict.items():
                entryseq_dict[entry] = leading_gaps * '-' + seq[
                    leading_gaps:-trailing_gaps] + trailing_gaps * '-'
        if residue_index_mapping is not None:
            full_residue_mapping[chain] = collections.OrderedDict()
            for res, i in residue_index_mapping.items():
                full_residue_mapping[chain][res] = i + leading_gaps
        else:
            full_residue_mapping[chain] = leading_gaps

        alignment_dict[chain] = entryseq_dict
        pctools.printv(
            'Removed ' + clrs['c'] + str(leading_gaps) + clrs['n'] +
            ' leading gaps and ' + clrs['c'] + str(trailing_gaps) + clrs['n'] +
            ' trailing gaps from chain ' + clrs['c'] + chain + clrs['n'] +
            ' alignment.\n', verbosity)

    # If symmetry is desired, reduce all chains to match the size of the smallest
    if args.symmetry:
        max_leading_gaps = 0
        max_trailing_gaps = 0
        for chain, seqs in alignment_dict.items():
            for entry, seq in seqs.items():
                if entry == best_oligo_template_file:
                    leading_gaps = 0
                    for r in seq:
                        if r == '-':
                            leading_gaps += 1
                        else:
                            break
                    if leading_gaps > max_leading_gaps:
                        max_leading_gaps = leading_gaps
                    trailing_gaps = 0
                    for r in seq[::-1]:
                        if r == '-':
                            trailing_gaps += 1
                        else:
                            break
                    if trailing_gaps > max_trailing_gaps:
                        max_trailing_gaps = trailing_gaps
        pctools.printv(
            'To cope with symmetry restraints, the modelled sequence will contain '
            + clrs['c'] + str(max_leading_gaps) + clrs['n'] +
            ' leading gaps and ' + clrs['c'] + str(max_trailing_gaps) +
            clrs['n'] + ' trailing gaps' + clrs['n'] + '.\n', verbosity)
        print(max_trailing_gaps)
        for chain, seqs in alignment_dict.items():
            if max_trailing_gaps == 0:
                seqs[entry] = max_leading_gaps * '-' + seqs[entry][
                    max_leading_gaps:]
            else:
                seqs[entry] = max_leading_gaps * '-' + seqs[
                    entry][max_leading_gaps:
                           -max_trailing_gaps] + max_trailing_gaps * '-'

    # Find out first and last chains
    first_chain = sorted(alignment_dict)[0]
    last_chain = sorted(alignment_dict)[-1]

    # Create strings to write in alignment file
    alignment_string_dict = {}
    for entry in [input_name, best_oligo_template_file]:
        if entry == input_name:
            alignment_string_dict[
                entry] = ">P1;" + input_name + "\nsequence:" + input_name + ":FIRST:" + first_chain + ":LAST:" + last_chain + "::::\n"
        elif entry == best_oligo_template_file:
            alignment_string_dict[
                entry] = ">P1;" + best_oligo_template_file + ".pdb\nstructureX:" + best_oligo_template_file + ".pdb:FIRST:" + first_chain + ":LAST:" + last_chain + "::::\n"
        for chain, entryseq in sorted(alignment_dict.items()):
            if chain == last_chain:
                alignment_string_dict[entry] += entryseq[entry] + '*\n'
            else:
                alignment_string_dict[entry] += entryseq[entry] + '/\n'

    # Write alignment file
    with open(final_alignment, 'w') as f:
        for entry, entrystring in alignment_string_dict.items():
            pctools.printv(entrystring, verbosity)
            f.write(entrystring)

    print('Modeller Alignment written to ' + clrs['g'] +
          os.path.basename(final_alignment) + clrs['n'] + '\n')
    return final_alignment, full_residue_mapping

Example #16

Show file

def make_oligomer(input_file,
                  largest_oligo_complexes,
                  report,
                  args,
                  residue_index_mapping=None):
    global workdir
    global input_name
    global verbosity
    global g_input_file
    global g_args
    global best_oligo_template_code
    global renamed_chains_file
    g_input_file = input_file
    g_args = args
    verbosity = args.verbosity
    workdir = os.getcwd()
    symmetry = args.symmetry

    # Subsection 2[a] #######################################################################
    if args.sequence_mode is False:
        input_name = os.path.basename(input_file).split(".pdb")[0].replace(
            '.', '_')
        candidate_qscores = {}
        # Select structurally best oligomeric template using GESAMT
        pctools.print_section(2, 'OLIGOMER ASSEMBLING')
        pctools.print_subsection('2[a]', 'Structural template selection')
        if args.multiprocess is True:
            p = Pool()
            for hitchain, average_qscore, output in p.map_async(
                    analyse_largest_complexes,
                    largest_oligo_complexes.items()).get():
                candidate_qscores[hitchain] = average_qscore
                report['hits'][hitchain]['qscore'] = round(average_qscore, 3)
                print(output)
            p.close()
            p.join()
        else:
            for item in largest_oligo_complexes.items():
                hitchain, average_qscore, output = analyse_largest_complexes(
                    item)
                candidate_qscores[hitchain] = average_qscore
                report['hits'][hitchain]['qscore'] = round(average_qscore, 3)
                print(output)

        best_oligo_template = max(candidate_qscores.keys(),
                                  key=(lambda x: candidate_qscores[x]))
        if candidate_qscores[best_oligo_template] >= args.qscore_cutoff:
            print('Structurally, the best template is: ' + clrs['y'] +
                  best_oligo_template + clrs['n'] + '. Using that!\n')
            report['best_template'] = best_oligo_template.split(':')[0]
            report['best_id'] = report['hits'][best_oligo_template]['id']
            report['best_cov'] = report['hits'][best_oligo_template][
                'coverage']
            report['best_qscore'] = report['hits'][best_oligo_template][
                'qscore']
            report['best_nchains'] = report['hits'][best_oligo_template][
                'final_homo_chains']
        else:
            print('No template had an average Q-score above cut-off of ' +
                  clrs['c'] + str(args.qscore_cutoff) + clrs['n'] +
                  '\nTry lowering the cutoff or running in sequence mode.\n')
            report['exit'] = '4'
            return None, None, report
        report['topology_figure'] = './' + best_oligo_template.replace(
            ':', '_') + '_CHOIR_Topology.png'
        template_chains = largest_oligo_complexes[best_oligo_template]
        best_oligo_template_code = best_oligo_template.split(':')[0]
        clean_template_file = make_local_template(best_oligo_template_code)

    elif args.sequence_mode is True:
        if input_file.endswith('.pdb'):
            input_name = os.path.basename(input_file).split(".pdb")[0].replace(
                '.', '_')
            input_file = os.path.join(
                workdir, input_name + '_CHOIR_MonomerSequence.fasta')
            g_input_file = input_file

        elif input_file.endswith('_CHOIR_MonomerSequence.fasta'):
            input_name = os.path.basename(input_file).split(
                "_CHOIR_MonomerSequence.fasta")[0]

        pctools.print_section(2, 'OLIGOMER ASSEMBLING - SEQUENCE MODE')
        print(clrs['y'] +
              "Skipping section 2[a] - Structural template selection" +
              clrs['n'] + "\n")
        attempt = 0
        while attempt < len(largest_oligo_complexes):
            try:
                best_oligo_template = list(largest_oligo_complexes)[attempt]
                report['best_template'] = best_oligo_template.split(':')[0]
                report['best_id'] = report['hits'][best_oligo_template]['id']
                report['best_cov'] = report['hits'][best_oligo_template][
                    'coverage']
                report['best_qscore'] = 'NA'
                report['best_nchains'] = report['hits'][best_oligo_template][
                    'final_homo_chains']
                report['topology_figure'] = './' + best_oligo_template.replace(
                    ':', '_') + '_CHOIR_Topology.png'
                template_chains = largest_oligo_complexes[best_oligo_template]
                best_oligo_template_code = best_oligo_template.split(':')[0]
                clean_template_file = make_local_template(
                    best_oligo_template_code)
                break
            except:
                attempt += 1
                if attempt < len(largest_oligo_complexes):
                    print('Attempt ' + str(attempt) +
                          ' failed, trying a differente template candidate.')
                if attempt == len(largest_oligo_complexes):
                    print('Failed to find templates in local databases.')
                    report['exit'] = '5'
                    return None, None, report

    relevant_chains_file = extract_relevant_chains(clean_template_file,
                                                   template_chains)
    if args.generate_report is True:
        report['template_figure'], pymol_output = pctools.pymol_screenshot(
            relevant_chains_file, args)
        print(pymol_output)
    renamed_chains_file, chains_dict = rename_relevant_chains(
        relevant_chains_file)
    relevant_chains = [
        chains_dict[template_chain] for template_chain in template_chains
    ]

    # Subsection 2[b] #######################################################################
    pctools.print_subsection('2[b]', 'Generating alignment')
    # Generate per chain alignment files
    alignment_files = []
    if args.sequence_mode is False:
        if args.multiprocess is True:
            p = Pool()
            for qscore, rmsd, fasta_out, gesamt_output in p.map_async(
                    run_gesamt_parallel, chains_dict.values()).get():
                alignment_files.append(fasta_out)
                print(gesamt_output)
            p.close()
            p.join()
        else:
            for chain in chains_dict.values():
                qscore, rmsd, fasta_out, gesamt_output = run_gesamt_parallel(
                    chain)
                alignment_files.append(fasta_out)
                print(gesamt_output)

    elif args.sequence_mode is True:
        if args.multiprocess is True:
            p = Pool()
            for fasta_out, output in p.map_async(alignment_from_sequence,
                                                 chains_dict.values()).get():
                alignment_files.append(fasta_out)
                print(output)
        else:
            for current_chain in chains_dict.values():
                fasta_out, output = alignment_from_sequence(current_chain)
                alignment_files.append(fasta_out)
                print(output)
    print('Alignment files:\n' + clrs['g'] +
          ('\n').join([os.path.basename(i)
                       for i in alignment_files]) + clrs['n'])

    # Generate final alignment which will be the input for Modeller
    final_alignment, full_residue_mapping = generate_ali(
        alignment_files, best_oligo_template_code, residue_index_mapping, args)
    # Score said alignment and enforce treshold
    report[
        'relative_alignment_score'], relative_wscores, nchains = score_alignment(
            final_alignment)
    print('\nFinal average relative score for alignment: ' +
          str(round(report['relative_alignment_score'], 2)) + '%')
    bad_streches = 0
    for wscore in relative_wscores:
        if wscore < args.similarity_cutoff:
            bad_streches += 1
    if bad_streches >= args.bad_streches * nchains:
        if args.sequence_mode is True:
            print(
                '\nThe alignment score was unacceptable for ' + clrs['r'] +
                str(bad_streches) + clrs['n'] +
                ' 30-res segments of the protein complex.\nTry running the default (structure) mode.\n'
            )
        else:
            print(
                '\nThe alignment score was unacceptable for ' + clrs['r'] +
                str(bad_streches) + clrs['n'] +
                ' 30-res segments of the protein complex.\nTry increasing the number of candidate templates or tweaking the similarity cut-offs.\n'
            )
        report['exit'] = '6'
        return None, None, report

    # Subsection 2[c] #######################################################################
    pctools.print_subsection('2[c]', 'Generating models')
    genmodel_file, expected_models = create_genmodel(final_alignment,
                                                     best_oligo_template_code,
                                                     relevant_chains, args)
    run_modeller(genmodel_file)

    # Record list of oligomers built
    nmodels = 0
    built_oligomers = []
    for model in expected_models:
        built_oligomers.append(
            restore_chain_identifiers(model, chains_dict,
                                      full_residue_mapping))
        nmodels += 1
    print(clrs['b'] + 'ProtCHOIR' + clrs['n'] + ' built ' + clrs['c'] +
          str(nmodels) + clrs['n'] + ' model oligomers:')
    for model in built_oligomers:
        print(clrs['g'] + model + clrs['n'])

    return best_oligo_template, built_oligomers, report

Example #17

Show file

def collect_fasta(verbosity):
    '''
    Fetches fasta files in the pdb_homo_archive and creates a single fasta file
    within a "sequences" folder. For that, it checks the identity among the
    chains in the original fasta and only keeps track of the unique chains, i.e.
    less than 99% identity to the other chains. This file is later use to make
    the blast database.
    Called by: update_databases()
    '''
    fastafiles = [
        os.path.join(dp, f) for dp, dn, filenames in os.walk(pdb_homo_archive)
        for f in filenames if f.endswith(".fasta")
    ]
    seqdir = os.path.join(pdb_homo_archive, 'sequences')
    if not os.path.isdir(seqdir):
        os.mkdir(seqdir)

    largepdb_collected_fasta = os.path.join(seqdir,
                                            'largepdb_collected.fastas')
    with open(largepdb_collected_fasta, 'w+'):
        pass

    homo_collected_fasta = os.path.join(seqdir, 'homo_collected.fastas')
    with open(homo_collected_fasta, 'w+'):
        pass

    mono_collected_fasta = os.path.join(seqdir, 'mono_collected.fastas')
    with open(mono_collected_fasta, 'w+'):
        pass

    hetero_collected_fasta = os.path.join(seqdir, 'hetero_collected.fastas')
    with open(hetero_collected_fasta, 'w+'):
        pass

    for fasta in pg(fastafiles, widgets=widgets):
        pctools.printv('Assessing ' + clrs['y'] + fasta + clrs['n'] + '...',
                       verbosity)
        contents = open(fasta, 'r').read()
        contentlines = contents.split('>')
        nchains = str(len(re.findall('>', contents)))
        pctools.printv(
            'With ' + clrs['y'] + nchains + clrs['n'] +
            ' chains to be assessed\n', verbosity)
        uniques = []
        for entry in contentlines:
            if entry:
                splitentry = entry.split('\n', 1)
                pdbch = splitentry[0]
                seq = splitentry[1].replace('\n', '')
                if uniques:
                    percent_ids = []
                    for unique in uniques:
                        alignment = parasail.sg_stats_striped_16(
                            seq, unique[1], 10, 1, parasail.blosum62)
                        if alignment.length == 0:
                            percent_ids.append(0)
                        else:
                            percent_ids.append(
                                (alignment.matches) / alignment.length * 100)
                    if all(percent_id <= 99 for percent_id in percent_ids):
                        uniques.append([pdbch, seq])
                else:
                    uniques.append([pdbch, seq])

        if '/largepdb_sequences/' in fasta:
            with open(largepdb_collected_fasta, 'a') as f:
                for unique in uniques:
                    wrapped_seq = "\n".join(tw.wrap(unique[1]))
                    fasta_entry = '>' + unique[0] + '\n' + wrapped_seq + '\n\n'
                    f.write(fasta_entry)

        elif '/mono_sequences/' in fasta:
            with open(mono_collected_fasta, 'a') as f:
                for unique in uniques:
                    wrapped_seq = "\n".join(tw.wrap(unique[1]))
                    fasta_entry = '>' + unique[0] + '\n' + wrapped_seq + '\n\n'
                    f.write(fasta_entry)

        elif '/hetero_sequences/' in fasta:
            with open(hetero_collected_fasta, 'a') as f:
                for unique in uniques:
                    wrapped_seq = "\n".join(tw.wrap(unique[1]))
                    fasta_entry = '>' + unique[0] + '\n' + wrapped_seq + '\n\n'
                    f.write(fasta_entry)

        elif '/homo_sequences/' in fasta:
            with open(homo_collected_fasta, 'a') as f:
                for unique in uniques:
                    wrapped_seq = "\n".join(tw.wrap(unique[1]))
                    fasta_entry = '>' + unique[0] + '\n' + wrapped_seq + '\n\n'
                    f.write(fasta_entry)

    subprocess.run([
        makeblastdb_exe, '-in', largepdb_collected_fasta, '-dbtype', 'prot',
        '-out',
        os.path.join(seqdir, 'largedb')
    ])

    subprocess.run([
        makeblastdb_exe, '-in', mono_collected_fasta, '-dbtype', 'prot',
        '-out',
        os.path.join(seqdir, 'monodb')
    ])

    subprocess.run([
        makeblastdb_exe, '-in', hetero_collected_fasta, '-dbtype', 'prot',
        '-out',
        os.path.join(seqdir, 'heterodb')
    ])

    subprocess.run([
        makeblastdb_exe, '-in', homo_collected_fasta, '-dbtype', 'prot',
        '-out',
        os.path.join(seqdir, 'homodb')
    ])

Example #18

Show file

def score_alignment(alignment_file):
    print(clrs['b'] + 'SCORING ALIGNMENT' + clrs['n'] + ' in ' + clrs['y'] +
          os.path.basename(alignment_file) + clrs['n'] + '\n')
    sequences = list(SeqIO.parse(alignment_file, "pir"))
    query_chains = str(sequences[0].seq).split('/')
    template_chains = str(sequences[1].seq).split('/')
    trimmed_query_chains = []
    trimmed_template_chains = []
    for query_chain, template_chain in zip(query_chains, template_chains):

        leading_gaps = 0
        for r in query_chain:
            if r == '-':
                leading_gaps += 1
            else:
                break
        trailing_gaps = 0
        for r in query_chain[::-1]:
            if r == '-':
                trailing_gaps += 1
            else:
                break

        if trailing_gaps == 0:
            trimmed_query_chains.append(query_chain[leading_gaps:])
            trimmed_template_chains.append(template_chain[leading_gaps:])
        else:
            trimmed_query_chains.append(
                query_chain[leading_gaps:-trailing_gaps])
            trimmed_template_chains.append(
                template_chain[leading_gaps:-trailing_gaps])

    relative_wscores = []
    relative_scores = []
    for q_chain, t_chain in zip(trimmed_query_chains, trimmed_template_chains):
        pctools.printv(
            '\nCalculating ' + clrs['y'] + 'maximum scores' + clrs['n'] +
            ' for chain segments:', g_args.verbosity)
        max_score, max_wscores = score_pairwise(t_chain, t_chain,
                                                MatrixInfo.blosum62, 0, 0)
        pctools.printv(
            '\nCalculating ' + clrs['y'] + 'actual scores' + clrs['n'] +
            ' for chain segments:', g_args.verbosity)
        score, wscores = score_pairwise(q_chain, t_chain, MatrixInfo.blosum62,
                                        0, 0)
        relative_scores.append(round(score * 100 / max_score, 2))

        for max_wscore, wscore in zip(max_wscores, wscores):
            if max_wscore != 0:
                relative_wscore = round(wscore * 100 / max_wscore, 2)
            else:
                relative_wscore = 100
            relative_wscores.append(relative_wscore)

    relative_score = sum(relative_scores) / len(relative_scores)
    string = ''
    for relative_wscore in relative_wscores:
        if relative_wscore > g_args.similarity_cutoff:
            color = 'g'
        else:
            color = 'r'
        if string == '':
            string += (clrs[color] + str(relative_wscore) + clrs['n'])
        else:
            string += (' ~ ' + clrs[color] + str(relative_wscore) + clrs['n'])
    print('\nRelative score per 30-res segment: ' + string + clrs['n'])
    return relative_score, relative_wscores, len(query_chains)

Example #19

Show file

def analyse_model(oligomer):
    output = []
    model_report = g_report.copy()
    model_report['model_filename'] = oligomer
    model_oligomer_name = os.path.basename(oligomer).split(
        "_CHOIR_")[0].replace('.', '_')
    output.append(pctools.subsection('3', model_oligomer_name))
    output.append('Analysing oligomer file: ' + clrs['y'] + oligomer +
                  clrs['n'] + '\n')
    model_report['model_oligomer_name'] = model_oligomer_name
    if g_args.generate_report is True:
        model_report['model_figures'], pymol_output = pctools.pymol_screenshot(
            oligomer, g_args, putty=True)
        output.append(pymol_output)
    pdb_name, structure, nchains = pctools.parse_any_structure(oligomer)
    nchains, seqs, chain_ids = pctools.extract_seqs(structure, 0)
    relevant_chains = []
    for seq in seqs:
        relevant_chains.append(seq[0])

    pisa_output, pisa_error, protomer_data = pctools.run_pisa(
        oligomer,
        '',
        g_args.verbosity,
        gen_monomer_data=True,
        gen_oligomer_data=True)
    protomer_surface_residues = pctools.get_areas(protomer_data)
    model_report['assemblied_protomer_plot'], model_report[
        'assemblied_protomer_exposed_area'], model_report[
            'assemblied_protomer_hydrophobic_area'], model_report[
                'assemblied_protomer_conserved_area'], minx, maxx, analysis_output = pctools.plot_analysis(
                    pdb_name,
                    protomer_surface_residues,
                    g_entropies,
                    g_z_entropies,
                    g_tmdata,
                    g_args,
                    minx=g_minx,
                    maxx=g_maxx)
    output.append(analysis_output)

    if 'I' in g_args.assessment and not g_args.allow_monomers:
        output.append(
            pctools.subsection('3' + '[I]', 'Interfaces Comparison: ' +
                               model_oligomer_name))
        if g_args.sequence_mode is False and g_args.skip_conservation is False:
            model_report['exposed_area_reduction'] = int(
                100 *
                (float(model_report['assemblied_protomer_exposed_area']) -
                 float(model_report['protomer_exposed_area'])) /
                float(model_report['protomer_exposed_area']))
            model_report['hydrophobic_area_reduction'] = int(
                100 *
                (float(model_report['assemblied_protomer_hydrophobic_area']) -
                 float(model_report['protomer_hydrophobic_area'])) /
                float(model_report['protomer_hydrophobic_area']))
            model_report['conserved_area_reduction'] = int(
                100 *
                (float(model_report['assemblied_protomer_conserved_area']) -
                 float(model_report['protomer_conserved_area'])) /
                float(model_report['protomer_conserved_area']))

            if model_report['exposed_area_reduction'] < -5:
                if model_report['hydrophobic_area_reduction'] < 0:
                    hydophobic_surface_score = 10 * (
                        model_report['hydrophobic_area_reduction'] /
                        model_report['exposed_area_reduction']) / 3
                else:
                    hydophobic_surface_score = 0
                if hydophobic_surface_score > 10:
                    hydophobic_surface_score = 10
                output.append('Hydrophobic surface score: ' +
                              str(hydophobic_surface_score))
                if model_report['conserved_area_reduction'] < 0:
                    conserved_surface_score = 10 * (
                        model_report['conserved_area_reduction'] /
                        model_report['exposed_area_reduction']) / 3
                else:
                    conserved_surface_score = 0
                if conserved_surface_score > 10:
                    conserved_surface_score = 10
                output.append('Conserved surface score: ' +
                              str(conserved_surface_score))
                model_report['surface_score'] = round(
                    (hydophobic_surface_score + conserved_surface_score) / 2,
                    2)
            else:
                output.append(clrs['r'] + 'Exposed area reduction too small.' +
                              clrs['n'])
                model_report['surface_score'] = 0
            output.append('Final surface score: ' +
                          str(model_report['surface_score']))
        else:
            model_report['surface_score'] = 'NA'

        model_oligomer = oligomer.split('_CHOIR_CorrectedChains')[0]
        xml_out = model_oligomer + '_CHOIR_PisaInterfaces.xml'
        model_interfaces_list, interfaces_output = pctools.parse_interfaces(
            xml_out, relevant_chains, g_args.verbosity)
        template_interfaces_list = g_interfaces_dict[g_template_hitchain]

        if model_interfaces_list and template_interfaces_list:
            if g_args.verbosity > 0:
                output.append(clrs['y'] + 'MODEL INTERFACES' + clrs['n'])
                for model_interface in model_interfaces_list:
                    output.append(clrs['y'] +
                                  ' <> '.join(model_interface['chains']) +
                                  clrs['n'])
                    output.append(clrs['y'] + 'Interface Area: ' + clrs['n'] +
                                  str(model_interface['interface area']) +
                                  ' A^2')
                    output.append(
                        clrs['y'] + 'Interface Solvation Energy: ' +
                        clrs['n'] +
                        str(model_interface['interface solvation energy']) +
                        ' kcal/mol')
                    output.append(clrs['y'] + 'Hydrogen Bonds: ' + clrs['n'] +
                                  str(model_interface['hydrogen bonds']))
                    output.append(clrs['y'] + 'Salt Bridges: ' + clrs['n'] +
                                  str(model_interface['salt bridges']))
                    output.append(clrs['y'] + 'Disulphide Bridges: ' +
                                  clrs['n'] +
                                  str(model_interface['disulphide bridges']) +
                                  "\n\n")

            interfaces_comparison = {}
            for template_interface in template_interfaces_list:
                for model_interface in model_interfaces_list:
                    if set(model_interface['chains']) == set(
                            template_interface['chains']):
                        comparison_data = {}
                        denominator = 12
                        delta_area = round(
                            model_interface['interface area'] -
                            template_interface['interface area'], 2)
                        comparison_data['model area'] = model_interface[
                            'interface area']
                        comparison_data['template area'] = template_interface[
                            'interface area']
                        comparison_data['delta area'] = delta_area
                        delta_energy = round(
                            model_interface['interface solvation energy'] -
                            template_interface['interface solvation energy'],
                            2)
                        comparison_data['model energy'] = model_interface[
                            'interface solvation energy']
                        comparison_data[
                            'template energy'] = template_interface[
                                'interface solvation energy']
                        comparison_data['delta energy'] = delta_energy
                        delta_hb = round(
                            model_interface['hydrogen bonds'] -
                            template_interface['hydrogen bonds'], 2)
                        comparison_data['model hb'] = model_interface[
                            'hydrogen bonds']
                        comparison_data['template hb'] = template_interface[
                            'hydrogen bonds']
                        comparison_data['delta hb'] = delta_hb
                        delta_sb = round(
                            model_interface['salt bridges'] -
                            template_interface['salt bridges'], 2)
                        comparison_data['model sb'] = model_interface[
                            'salt bridges']
                        comparison_data['template sb'] = template_interface[
                            'salt bridges']
                        comparison_data['delta sb'] = delta_sb
                        delta_ss = round(
                            model_interface['disulphide bridges'] -
                            template_interface['disulphide bridges'], 2)
                        comparison_data['model ss'] = model_interface[
                            'disulphide bridges']
                        comparison_data['template ss'] = template_interface[
                            'disulphide bridges']
                        comparison_data['delta ss'] = delta_ss

                        output.append(clrs['y'] + 'INTERFACES COMPARISON' +
                                      clrs['n'])
                        output.append(' <> '.join(model_interface['chains']))
                        if delta_area >= 0:
                            emphasis_color = clrs['g']
                            relative_area = 100
                        else:
                            emphasis_color = clrs['r']
                            relative_area = round(
                                model_interface['interface area'] * 100 /
                                template_interface['interface area'], 2)
                        output.append('Delta Interface Area: ' +
                                      emphasis_color + str(delta_area) +
                                      clrs['n'] + ' A^2 (' +
                                      str(relative_area) + '%)')

                        if delta_energy <= 0:
                            emphasis_color = clrs['g']
                            relative_energy = 100
                        else:
                            emphasis_color = clrs['r']
                            if model_interface[
                                    'interface solvation energy'] < 0 and template_interface[
                                        'interface solvation energy'] < 0:
                                relative_energy = round(
                                    model_interface[
                                        'interface solvation energy'] * 100 /
                                    template_interface[
                                        'interface solvation energy'], 2)
                            elif model_interface[
                                    'interface solvation energy'] > 0 and template_interface[
                                        'interface solvation energy'] < 0:
                                relative_energy = 0
                            elif model_interface[
                                    'interface solvation energy'] < 0 and template_interface[
                                        'interface solvation energy'] > 0:
                                relative_energy = 100
                            elif model_interface[
                                    'interface solvation energy'] > 0 and template_interface[
                                        'interface solvation energy'] > 0:
                                relative_energy = 0
                        output.append('Delta Interface Solvation Energy: ' +
                                      emphasis_color + str(delta_energy) +
                                      clrs['n'] + ' kcal/mol (' +
                                      str(relative_energy) + '%)')

                        if model_interface[
                                'hydrogen bonds'] == template_interface[
                                    'hydrogen bonds'] == 0:
                            relative_hb = 0
                            emphasis_color = clrs['r']
                            denominator -= 2
                        elif delta_hb >= 0:
                            relative_hb = 100
                            emphasis_color = clrs['g']
                        else:
                            emphasis_color = clrs['r']
                            relative_hb = round(
                                model_interface['hydrogen bonds'] * 100 /
                                template_interface['hydrogen bonds'], 2)
                        output.append('Delta Hydrogen Bonds: ' +
                                      emphasis_color + str(delta_hb) +
                                      clrs['n'] + ' (' + str(relative_hb) +
                                      '%)')

                        if model_interface[
                                'salt bridges'] == template_interface[
                                    'salt bridges'] == 0:
                            relative_sb = 0
                            emphasis_color = clrs['r']
                            denominator -= 3
                        elif delta_sb >= 0:
                            relative_sb = 100
                            emphasis_color = clrs['g']
                        else:
                            relative_sb = round(
                                model_interface['salt bridges'] * 100 /
                                template_interface['salt bridges'], 2)
                            emphasis_color = clrs['r']
                        output.append('Delta Salt Bridges: ' + emphasis_color +
                                      str(delta_sb) + clrs['n'] + ' (' +
                                      str(relative_sb) + '%)')

                        if model_interface[
                                'disulphide bridges'] == template_interface[
                                    'disulphide bridges'] == 0:
                            relative_ss = 0
                            emphasis_color = clrs['r']
                            denominator -= 4
                        elif delta_ss >= 0:
                            relative_ss = 100
                            emphasis_color = clrs['g']
                        else:
                            relative_ss = round(
                                model_interface['disulphide bridges'] * 100 /
                                template_interface['disulphide bridges'], 2)
                            emphasis_color = clrs['r']
                        output.append('Delta Disulphide Bridges: ' +
                                      emphasis_color + str(delta_ss) +
                                      clrs['n'] + ' (' + str(relative_ss) +
                                      '%)\n')

                        if denominator == 0:
                            comparison_data['score'] = 0
                        else:
                            comparison_data['score'] = round(
                                (relative_area + 2 * relative_energy +
                                 2 * relative_hb + 3 * relative_sb +
                                 4 * relative_ss) / denominator, 2)
                        output.append('Interface score: ' +
                                      str(comparison_data['score']))
                        interfaces_comparison[''.join(
                            sorted(
                                model_interface['chains']))] = comparison_data

            comparison_plots, interfaces_output = plot_deltas(
                model_oligomer_name, template, interfaces_comparison, g_args)
            model_report['comparison_plots'] = os.path.basename(
                comparison_plots)
            output.append(interfaces_output)
            summed_score = 0
            for interface, data in interfaces_comparison.items():
                summed_score += data['score']

            model_report['interfaces_score'] = round(
                summed_score / (10 * len(interfaces_comparison)), 2)
            output.append('Final interfaces score: ' +
                          str(model_report['interfaces_score']))
        else:
            if 'surface_score' not in model_report:
                model_report['surface_score'] = 0
            model_report['interfaces_score'] = 0

    else:
        model_report['surface_score'] = 'NA'
        model_report['interfaces_score'] = 'NA'
        model_report['comparison_plots'] = 'NA'
        model_report['assemblied_protomer_exposed_area'] = 'NA'
        model_report['assemblied_protomer_hydrophobic_area'] = 'NA'
        model_report['assemblied_protomer_conserved_area'] = 'NA'

    if 'G' in g_args.assessment:
        output.append(pctools.subsection('3' + '[G]', 'GESAMT Comparison'))
        qscore, rmsd, fasta_out, gesamt_output = pctools.run_gesamt(
            template, template_file, model_oligomer_name, oligomer, None,
            g_args)
        output.append(gesamt_output)
        model_report['gesamt_qscore'] = str(qscore)
        model_report['gesamt_rmsd'] = str(rmsd)
    else:
        model_report['gesamt_qscore'] = 'NA'
        model_report['gesamt_rmsd'] = 'NA'

    if 'M' in g_args.assessment:
        output.append(pctools.subsection('3' + '[M]', 'Molprobity Comparison'))
        model_molprobity, molprobity_output = pctools.run_molprobity(
            oligomer, g_args)
        output.append(molprobity_output)
        model_report['model_clashscore'] = str(model_molprobity['clashscore'])
        model_report['model_molprobity'] = str(
            model_molprobity['molprobity_score'])
        output.append(clrs['y'] + 'MOLPROBITY COMPARISON' + clrs['n'])
        output.append('Criterion\tTempl.\tModel')
        output.append('Rama. Fav.\t' + str(template_molprobity['rama_fav']) +
                      '\t' + str(model_molprobity['rama_fav']))
        output.append('Rama. Out.\t' + str(template_molprobity['rama_out']) +
                      '\t' + str(model_molprobity['rama_out']))
        output.append('Rot. Out.\t' + str(template_molprobity['rot_out']) +
                      '\t' + str(model_molprobity['rot_out']))
        output.append('CBeta Dev.\t' + str(template_molprobity['cb_dev']) +
                      '\t' + str(model_molprobity['cb_dev']))
        output.append('Clashscore\t' + str(template_molprobity['clashscore']) +
                      '\t' + str(model_molprobity['clashscore']))
        output.append('Molprob. Score\t' +
                      str(template_molprobity['molprobity_score']) + '\t' +
                      str(model_molprobity['molprobity_score']))
        molprobity_radar, radar_output = plot_molprobity(
            model_oligomer_name, model_molprobity, template,
            template_molprobity)
        output.append(radar_output)
        model_report['molprobity_radar'] = molprobity_radar
        delta_clashscore = (model_molprobity['clashscore'] -
                            template_molprobity['clashscore']) / 10
        output.append('Delta clashscore: ' + str(delta_clashscore))
        if delta_clashscore >= 1:
            model_report['quality_score'] = round(
                10 - math.log(delta_clashscore**5, 10), 2)
        else:
            model_report['quality_score'] = 10
        output.append('Final quality score: ' +
                      str(model_report['quality_score']))
    else:
        model_report['model_clashscore'] = 'NA'
        model_report['model_molprobity'] = 'NA'
        model_report['quality_score'] = 'NA'

    if 'M' in g_args.assessment and 'I' in g_args.assessment and not g_args.allow_monomers:
        if g_args.sequence_mode is False and g_args.skip_conservation is False:
            model_report['protchoir_score'] = round(
                sum([
                    model_report['interfaces_score'],
                    model_report['surface_score'],
                    model_report['quality_score']
                ]) / 3, 2)
        else:
            model_report['protchoir_score'] = round(
                sum([
                    model_report['interfaces_score'],
                    model_report['quality_score']
                ]) / 2, 2)
    elif 'M' in g_args.assessment:
        model_report['protchoir_score'] = model_report['quality_score']
    elif 'I' in g_args.assessment:
        if g_args.sequence_mode is False and g_args.skip_conservation is False:
            model_report['protchoir_score'] = round(
                sum([
                    model_report['interfaces_score'],
                    model_report['surface_score']
                ]) / 2, 2)
        else:
            model_report['protchoir_score'] = model_report['interfaces_score']
    else:
        model_report['protchoir_score'] = 'NA'
    if str(model_report['protchoir_score']) == 'NA':
        model_report['score_color'] = 'grey'
    elif model_report['protchoir_score'] <= 5:
        model_report['score_color'] = 'red'
    elif 5 < model_report['protchoir_score'] <= 7:
        model_report['score_color'] = 'orange'
    elif model_report['protchoir_score'] > 7:
        model_report['score_color'] = 'green'

    pickle.dump(model_report,
                open(model_oligomer_name + '_CHOIR_model_report.pickle', 'wb'))

    return model_report, '\n'.join(output)