Python Toolbox.extract_seqs Exemples

Langage de programmation: Python

Espace de nommage/Pack: ProtCHOIR

Class/Type: Toolbox

Méthode/Fonction: extract_seqs

Exemples au hotexamples.com: 2

Python Toolbox.extract_seqs - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de ProtCHOIR.Toolbox.extract_seqs extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

printv(8)

parse_any_structure(5)

print_section(3)

parse_pdb_contents(2)

extract_seqs(2)

run_molprobity(2)

run_gesamt(2)

pymol_screenshot(2)

FileFormatError(1)

split_states(1)

run_pisa(1)

print_subsection(1)

print_sorry(1)

plot_analysis(1)

parse_interfaces(1)

parse_pdb_structure(1)

SelectChains(1)

is_valid_sequence(1)

is_nmr(1)

html_report(1)

gzip_pdb(1)

get_pairwise_ids(1)

get_areas(1)

get_annotated_states(1)

check_interfaces(1)

SelectIfCA(1)

subsection(1)

Méthodes fréquemment utilisées

printv (8)

parse_any_structure (5)

print_section (3)

parse_pdb_contents (2)

extract_seqs (2)

run_molprobity (2)

run_gesamt (2)

pymol_screenshot (2)

FileFormatError (1)

split_states (1)

Méthodes fréquemment utilisées

run_pisa (1)

print_subsection (1)

print_sorry (1)

plot_analysis (1)

parse_interfaces (1)

parse_pdb_structure (1)

SelectChains (1)

is_valid_sequence (1)

is_nmr (1)

html_report (1)

gzip_pdb (1)

get_pairwise_ids (1)

get_areas (1)

get_annotated_states (1)

check_interfaces (1)

SelectIfCA (1)

subsection (1)

Méthodes fréquemment utilisées

gzip_pdb (1)

get_pairwise_ids (1)

get_areas (1)

get_annotated_states (1)

check_interfaces (1)

SelectIfCA (1)

subsection (1)

Exemple #1

0

Afficher le fichier

def curate_homoDB(verbosity): ''' Creates h**o-oligomeric database from a local pdb repsitory. The divided scheme adopted by RCSB, in which the subdirectories are the two middle characters in the PDB code, is assumed. Each database contains three key files: dat, log and fasta. * homodb.dat contains only the pdb codes contained in the database. * homodb.log contains summarized relevant information about each entry. * homodb.fasta contains the sequences of every chain in the database. Called by: update_databases() ''' # Create stats folder if does not exist stats_dir = os.path.join(pdb_homo_archive, 'stats') if not os.path.isdir(stats_dir): os.mkdir(stats_dir) # Compare latest assession with new files assession_log = read_latest_assession(stats_dir) new_files = list_new_files(pdb1_archive, assession_log, verbosity) print(clrs['g'] + str(len(new_files)) + clrs['n'] + ' new structure files were found and will be processed') now = str(time.strftime("%d-%m-%Y@%H.%M.%S")) dat_file = os.path.join(stats_dir, now + '-choirdb.dat') log_file = os.path.join(stats_dir, now + '-choirdb.log') err_file = os.path.join(stats_dir, now + '-choirdb.err') if not os.path.isfile(dat_file): with open(dat_file, 'w+'): pass # Write files not to be updated to new dat file with open(dat_file, 'a') as f: for i in assession_log: if i not in new_files: f.write(i + " " + assession_log[i] + "\n") # Create log file if not os.path.isfile(log_file): with open(log_file, 'w+') as f: f.write('Code, Chains, Author, Software, Date\n') # Read Chain correspondences chain_correspondences_file = os.path.join(stats_dir, 'chain_correspondences.pickle') if os.path.isfile(chain_correspondences_file): with open(chain_correspondences_file, 'rb') as p: chain_correspondences = pickle.load(p) else: chain_correspondences = {} # Main loop that will populate the ProtCHOIR database for pdb in pg(new_files, widgets=widgets): filename = pdb.split('/')[-1] subfolder = pdb.split('/')[-2] # Record assessment in dat file with open(dat_file, 'a') as f: f.write(filename + " " + str(time.time()) + '\n') # Start assession pctools.printv('\nAssessing ' + pdb + '...', verbosity) # Reject files larger than 10Mb file_size = os.stat(pdb).st_size / 1048576 pctools.printv( 'File size: ' + clrs['c'] + '{0:.1g}'.format(file_size) + ' Mb' + clrs['n'], verbosity) if file_size > 2: pctools.printv(clrs['r'] + "File size too large!" + clrs['n'], verbosity) pctools.printv( clrs['y'] + "Will try to fetch sequences from asymmetric unit." + clrs['n'], verbosity) try: alternative_pdb = os.path.join( pdb_archive, subfolder, 'pdb' + filename.split('.')[0] + '.ent.gz') pdb_code, structure, nchains = pctools.parse_pdb_structure( alternative_pdb) structure, chain_correspondences[ pdb_code] = pctools.split_states(structure) nchainspostsplit, seqs, chain_ids = pctools.extract_seqs( structure, 0) # Write in fasta file pctools.printv( clrs['y'] + "Recording large-pdb sequence" + clrs['n'], verbosity) record_fasta(pdb_code, seqs, chain_ids, subfolder, type='largepdb') except: pctools.printv( clrs['r'] + "Failed to fetch sequence!" + clrs['n'], verbosity) continue try: pdb_code, structure, nchains = pctools.parse_pdb_structure(pdb) pctools.printv( 'Number of chains in structure ' + clrs['y'] + pdb_code + clrs['n'] + ': ' + str(nchains), verbosity) # Reject structures with more than 60 chains if int(nchains) > 60: pctools.printv( "Number of chains (" + clrs['y'] + str(nchains) + clrs['n'] + ") larger than 60! " + clrs['r'] + "Too many chains!" + clrs['n'], verbosity) pctools.printv( clrs['y'] + "Will try to fetch sequences anyway." + clrs['n'], verbosity) try: pdb_code, structure, nchains = pctools.parse_pdb_structure( pdb) structure, chain_correspondences[ pdb_code] = pctools.split_states(structure) nchainspostsplit, seqs, chain_ids = pctools.extract_seqs( structure, 0) pctools.printv( clrs['y'] + "Recording large-pdb sequence" + clrs['n'], verbosity) # Write in fasta file record_fasta(pdb_code, seqs, chain_ids, subfolder, type='largepdb') except: pctools.printv( clrs['r'] + "Failed to fetch sequence!" + clrs['n'], verbosity) continue structure, chain_correspondences[pdb_code] = pctools.split_states( structure) nchainspostsplit, seqs, chain_ids = pctools.extract_seqs( structure, 0) pctools.printv( 'Number of chains (' + clrs['c'] + str(nchains) + clrs['n'] + ') and file size (' + clrs['c'] + str(file_size) + clrs['n'] + ') OK.' + clrs['g'] + ' Proceeding.' + clrs['n'] + '\n', verbosity) # Try to get info from the canonic pdb header (homonimous to pdb1) canonpdb = "pdb" + pdb_code + ".ent.gz" try: contents = pctools.parse_pdb_contents( os.path.join(pdb_archive, subfolder, canonpdb))[1] except: pctools.printv( clrs['r'] + '\n\n Mismatch between pdb and biounit entries...' + clrs['n'], verbosity) author, software = pctools.get_annotated_states(contents) pctools.printv( 'Author determined biological unit = ' + str(author), verbosity) pctools.printv( 'Software determined quaternary structure= ' + str(software), verbosity) # Start assessing sequences and structures (from 2 up to 26 chains) if 1 < int(nchains) < 61: ids, proteinpair = pctools.get_pairwise_ids(seqs, nchains) for id in ids: if id[0] >= 90: color = clrs['g'] else: color = clrs['r'] pctools.printv( 'Identity between chains ' + clrs['y'] + str(id[1]) + clrs['n'] + ' and ' + clrs['y'] + str(id[2]) + clrs['n'] + ' is ' + color + str(id[0]) + "%" + clrs['n'] + ".", verbosity) # Save records for pure h**o-oligomers if all(id[0] > 90 for id in ids) and proteinpair is True: pctools.printv( "All identities over 90%. Likely " + clrs['b'] + "h**o-oligomeric" + clrs['n'] + ".", verbosity) pctools.printv(clrs['y'] + "FETCHING" + clrs['n'] + ".\n", verbosity) # Write file to database newfile = os.path.join(pdb_homo_archive, subfolder, pdb_code + ".pdb") if not os.path.isdir( os.path.join(pdb_homo_archive, subfolder)): os.mkdir(os.path.join(pdb_homo_archive, subfolder)) io.set_structure(structure) io.save(newfile) pctools.gzip_pdb(newfile) # Write to log file with open(log_file, 'a') as f: f.write( str(pdb_code) + "," + str(nchains) + "," + '/'.join(author) + "," + '/'.join(software) + "," + str(os.path.getctime(newfile + '.gz')) + '\n') # Write in fasta file pctools.printv( clrs['y'] + "Recording h**o-oligomer sequence." + clrs['n'], verbosity) record_fasta(pdb_code, seqs, chain_ids, subfolder, type='h**o') # Investigate partial h**o-oligomers elif any(id[0] > 90 for id in ids) and proteinpair is True: at_least_one_interface = False for id in ids: if id[0] > 90: # Check if similar chains share interfaces if pctools.check_interfaces( structure, id[1], id[2]): at_least_one_interface = True pctools.printv( 'Contacts found between chains ' + clrs['g'] + str(id[1]) + clrs['n'] + ' and ' + clrs['g'] + str(id[2]) + clrs['n'] + ' sharing ' + clrs['g'] + str(id[0]) + clrs['n'] + " % identity.", verbosity) pctools.printv( "At least one putative " + clrs['b'] + "h**o-oligomeric " + clrs['n'] + "interface found.", verbosity) pctools.printv( clrs['y'] + "FETCHING" + clrs['n'] + ".\n", verbosity) # Write file to database newfile = os.path.join(pdb_homo_archive, subfolder, pdb_code + ".pdb") if not os.path.isdir( os.path.join(pdb_homo_archive, subfolder)): os.mkdir( os.path.join(pdb_homo_archive, subfolder)) io.set_structure(structure) io.save(newfile) pctools.gzip_pdb(newfile) # Write to log file with open(log_file, 'a') as f: f.write( str(pdb_code) + "," + str(nchains) + "," + '/'.join(author) + "," + '/'.join(software) + "," + str(os.path.getctime(newfile + '.gz')) + '\n') # Write in fasta file pctools.printv( clrs['y'] + "Recording h**o-oligomer sequence." + clrs['n'], verbosity) record_fasta(pdb_code, seqs, chain_ids, subfolder, type='h**o') break if at_least_one_interface is False: pctools.printv( "No h**o-oligomeric interface found. Likely " + clrs['r'] + "hetero-oligomeric" + clrs['n'] + ".", verbosity) pctools.printv( clrs['y'] + "Recording hetero-oligomer sequence" + clrs['n'], verbosity) # Write in fasta file record_fasta(pdb_code, seqs, chain_ids, subfolder, type='hetero') elif proteinpair is False: pctools.printv( clrs['r'] + "No proteic chain pairs found" + clrs['n'] + ".", verbosity) if any([set(seq[1]) != {'X'} for seq in seqs]): pctools.printv( clrs['y'] + "Protein sequences found though" + clrs['n'], verbosity) pctools.printv( clrs['y'] + "Recording hetero-oligomer sequence" + clrs['n'], verbosity) # Write in fasta file record_fasta(pdb_code, seqs, chain_ids, subfolder, type='hetero') else: pctools.printv( clrs['r'] + "Not even a single protein chain. Disregarding." + clrs['n'], verbosity) else: pctools.printv( "No similar chains found. Likely " + clrs['r'] + "hetero-oligomeric" + clrs['n'] + ".", verbosity) pctools.printv( clrs['y'] + "Recording hetero-oligomer sequence" + clrs['n'], verbosity) record_fasta(pdb_code, seqs, chain_ids, subfolder, type='hetero') elif int(nchains) == 1: pctools.printv( "Only one chain found. Likely " + clrs['r'] + "monomeric" + clrs['n'] + ".", verbosity) pctools.printv( clrs['y'] + "Recording monomer sequence." + clrs['n'], verbosity) structure, chain_correspondences[ pdb_code] = pctools.split_states(structure) nchains, seqs, chain_ids = pctools.extract_seqs(structure, 0) record_fasta(pdb_code, seqs, chain_ids, subfolder, type='mono') except: errtype, errvalue, errtraceback = sys.exc_info() errtypeshort = str(errtype).split('\'')[1] pctools.printv( clrs['r'] + '*' + str(errtypeshort) + ': ' + str(errvalue) + ' l.' + str(errtraceback.tb_lineno) + '*' + clrs['n'], verbosity) traceback.print_exception(*sys.exc_info()) if errtypeshort == 'KeyboardInterrupt': quit() #pctools.printv(clrs['r']+"UNKNOWN FAULT"+clrs['n']+".", verbosity) if not os.path.isfile(err_file): with open(err_file, 'w+') as f: pass with open(err_file, 'a') as f: f.write(filename + '\n') continue with open(chain_correspondences_file, 'wb') as p: pickle.dump(chain_correspondences, p, protocol=pickle.HIGHEST_PROTOCOL) if not os.path.isfile(err_file): with open(err_file, 'w+') as f: f.write('\nNo errors. Assessment terminated succesfully.\n')

Exemple #2

0

Afficher le fichier

def analyse_model(oligomer): output = [] model_report = g_report.copy() model_report['model_filename'] = oligomer model_oligomer_name = os.path.basename(oligomer).split( "_CHOIR_")[0].replace('.', '_') output.append(pctools.subsection('3', model_oligomer_name)) output.append('Analysing oligomer file: ' + clrs['y'] + oligomer + clrs['n'] + '\n') model_report['model_oligomer_name'] = model_oligomer_name if g_args.generate_report is True: model_report['model_figures'], pymol_output = pctools.pymol_screenshot( oligomer, g_args, putty=True) output.append(pymol_output) pdb_name, structure, nchains = pctools.parse_any_structure(oligomer) nchains, seqs, chain_ids = pctools.extract_seqs(structure, 0) relevant_chains = [] for seq in seqs: relevant_chains.append(seq[0]) pisa_output, pisa_error, protomer_data = pctools.run_pisa( oligomer, '', g_args.verbosity, gen_monomer_data=True, gen_oligomer_data=True) protomer_surface_residues = pctools.get_areas(protomer_data) model_report['assemblied_protomer_plot'], model_report[ 'assemblied_protomer_exposed_area'], model_report[ 'assemblied_protomer_hydrophobic_area'], model_report[ 'assemblied_protomer_conserved_area'], minx, maxx, analysis_output = pctools.plot_analysis( pdb_name, protomer_surface_residues, g_entropies, g_z_entropies, g_tmdata, g_args, minx=g_minx, maxx=g_maxx) output.append(analysis_output) if 'I' in g_args.assessment and not g_args.allow_monomers: output.append( pctools.subsection('3' + '[I]', 'Interfaces Comparison: ' + model_oligomer_name)) if g_args.sequence_mode is False and g_args.skip_conservation is False: model_report['exposed_area_reduction'] = int( 100 * (float(model_report['assemblied_protomer_exposed_area']) - float(model_report['protomer_exposed_area'])) / float(model_report['protomer_exposed_area'])) model_report['hydrophobic_area_reduction'] = int( 100 * (float(model_report['assemblied_protomer_hydrophobic_area']) - float(model_report['protomer_hydrophobic_area'])) / float(model_report['protomer_hydrophobic_area'])) model_report['conserved_area_reduction'] = int( 100 * (float(model_report['assemblied_protomer_conserved_area']) - float(model_report['protomer_conserved_area'])) / float(model_report['protomer_conserved_area'])) if model_report['exposed_area_reduction'] < -5: if model_report['hydrophobic_area_reduction'] < 0: hydophobic_surface_score = 10 * ( model_report['hydrophobic_area_reduction'] / model_report['exposed_area_reduction']) / 3 else: hydophobic_surface_score = 0 if hydophobic_surface_score > 10: hydophobic_surface_score = 10 output.append('Hydrophobic surface score: ' + str(hydophobic_surface_score)) if model_report['conserved_area_reduction'] < 0: conserved_surface_score = 10 * ( model_report['conserved_area_reduction'] / model_report['exposed_area_reduction']) / 3 else: conserved_surface_score = 0 if conserved_surface_score > 10: conserved_surface_score = 10 output.append('Conserved surface score: ' + str(conserved_surface_score)) model_report['surface_score'] = round( (hydophobic_surface_score + conserved_surface_score) / 2, 2) else: output.append(clrs['r'] + 'Exposed area reduction too small.' + clrs['n']) model_report['surface_score'] = 0 output.append('Final surface score: ' + str(model_report['surface_score'])) else: model_report['surface_score'] = 'NA' model_oligomer = oligomer.split('_CHOIR_CorrectedChains')[0] xml_out = model_oligomer + '_CHOIR_PisaInterfaces.xml' model_interfaces_list, interfaces_output = pctools.parse_interfaces( xml_out, relevant_chains, g_args.verbosity) template_interfaces_list = g_interfaces_dict[g_template_hitchain] if model_interfaces_list and template_interfaces_list: if g_args.verbosity > 0: output.append(clrs['y'] + 'MODEL INTERFACES' + clrs['n']) for model_interface in model_interfaces_list: output.append(clrs['y'] + ' <> '.join(model_interface['chains']) + clrs['n']) output.append(clrs['y'] + 'Interface Area: ' + clrs['n'] + str(model_interface['interface area']) + ' A^2') output.append( clrs['y'] + 'Interface Solvation Energy: ' + clrs['n'] + str(model_interface['interface solvation energy']) + ' kcal/mol') output.append(clrs['y'] + 'Hydrogen Bonds: ' + clrs['n'] + str(model_interface['hydrogen bonds'])) output.append(clrs['y'] + 'Salt Bridges: ' + clrs['n'] + str(model_interface['salt bridges'])) output.append(clrs['y'] + 'Disulphide Bridges: ' + clrs['n'] + str(model_interface['disulphide bridges']) + "\n\n") interfaces_comparison = {} for template_interface in template_interfaces_list: for model_interface in model_interfaces_list: if set(model_interface['chains']) == set( template_interface['chains']): comparison_data = {} denominator = 12 delta_area = round( model_interface['interface area'] - template_interface['interface area'], 2) comparison_data['model area'] = model_interface[ 'interface area'] comparison_data['template area'] = template_interface[ 'interface area'] comparison_data['delta area'] = delta_area delta_energy = round( model_interface['interface solvation energy'] - template_interface['interface solvation energy'], 2) comparison_data['model energy'] = model_interface[ 'interface solvation energy'] comparison_data[ 'template energy'] = template_interface[ 'interface solvation energy'] comparison_data['delta energy'] = delta_energy delta_hb = round( model_interface['hydrogen bonds'] - template_interface['hydrogen bonds'], 2) comparison_data['model hb'] = model_interface[ 'hydrogen bonds'] comparison_data['template hb'] = template_interface[ 'hydrogen bonds'] comparison_data['delta hb'] = delta_hb delta_sb = round( model_interface['salt bridges'] - template_interface['salt bridges'], 2) comparison_data['model sb'] = model_interface[ 'salt bridges'] comparison_data['template sb'] = template_interface[ 'salt bridges'] comparison_data['delta sb'] = delta_sb delta_ss = round( model_interface['disulphide bridges'] - template_interface['disulphide bridges'], 2) comparison_data['model ss'] = model_interface[ 'disulphide bridges'] comparison_data['template ss'] = template_interface[ 'disulphide bridges'] comparison_data['delta ss'] = delta_ss output.append(clrs['y'] + 'INTERFACES COMPARISON' + clrs['n']) output.append(' <> '.join(model_interface['chains'])) if delta_area >= 0: emphasis_color = clrs['g'] relative_area = 100 else: emphasis_color = clrs['r'] relative_area = round( model_interface['interface area'] * 100 / template_interface['interface area'], 2) output.append('Delta Interface Area: ' + emphasis_color + str(delta_area) + clrs['n'] + ' A^2 (' + str(relative_area) + '%)') if delta_energy <= 0: emphasis_color = clrs['g'] relative_energy = 100 else: emphasis_color = clrs['r'] if model_interface[ 'interface solvation energy'] < 0 and template_interface[ 'interface solvation energy'] < 0: relative_energy = round( model_interface[ 'interface solvation energy'] * 100 / template_interface[ 'interface solvation energy'], 2) elif model_interface[ 'interface solvation energy'] > 0 and template_interface[ 'interface solvation energy'] < 0: relative_energy = 0 elif model_interface[ 'interface solvation energy'] < 0 and template_interface[ 'interface solvation energy'] > 0: relative_energy = 100 elif model_interface[ 'interface solvation energy'] > 0 and template_interface[ 'interface solvation energy'] > 0: relative_energy = 0 output.append('Delta Interface Solvation Energy: ' + emphasis_color + str(delta_energy) + clrs['n'] + ' kcal/mol (' + str(relative_energy) + '%)') if model_interface[ 'hydrogen bonds'] == template_interface[ 'hydrogen bonds'] == 0: relative_hb = 0 emphasis_color = clrs['r'] denominator -= 2 elif delta_hb >= 0: relative_hb = 100 emphasis_color = clrs['g'] else: emphasis_color = clrs['r'] relative_hb = round( model_interface['hydrogen bonds'] * 100 / template_interface['hydrogen bonds'], 2) output.append('Delta Hydrogen Bonds: ' + emphasis_color + str(delta_hb) + clrs['n'] + ' (' + str(relative_hb) + '%)') if model_interface[ 'salt bridges'] == template_interface[ 'salt bridges'] == 0: relative_sb = 0 emphasis_color = clrs['r'] denominator -= 3 elif delta_sb >= 0: relative_sb = 100 emphasis_color = clrs['g'] else: relative_sb = round( model_interface['salt bridges'] * 100 / template_interface['salt bridges'], 2) emphasis_color = clrs['r'] output.append('Delta Salt Bridges: ' + emphasis_color + str(delta_sb) + clrs['n'] + ' (' + str(relative_sb) + '%)') if model_interface[ 'disulphide bridges'] == template_interface[ 'disulphide bridges'] == 0: relative_ss = 0 emphasis_color = clrs['r'] denominator -= 4 elif delta_ss >= 0: relative_ss = 100 emphasis_color = clrs['g'] else: relative_ss = round( model_interface['disulphide bridges'] * 100 / template_interface['disulphide bridges'], 2) emphasis_color = clrs['r'] output.append('Delta Disulphide Bridges: ' + emphasis_color + str(delta_ss) + clrs['n'] + ' (' + str(relative_ss) + '%)\n') if denominator == 0: comparison_data['score'] = 0 else: comparison_data['score'] = round( (relative_area + 2 * relative_energy + 2 * relative_hb + 3 * relative_sb + 4 * relative_ss) / denominator, 2) output.append('Interface score: ' + str(comparison_data['score'])) interfaces_comparison[''.join( sorted( model_interface['chains']))] = comparison_data comparison_plots, interfaces_output = plot_deltas( model_oligomer_name, template, interfaces_comparison, g_args) model_report['comparison_plots'] = os.path.basename( comparison_plots) output.append(interfaces_output) summed_score = 0 for interface, data in interfaces_comparison.items(): summed_score += data['score'] model_report['interfaces_score'] = round( summed_score / (10 * len(interfaces_comparison)), 2) output.append('Final interfaces score: ' + str(model_report['interfaces_score'])) else: if 'surface_score' not in model_report: model_report['surface_score'] = 0 model_report['interfaces_score'] = 0 else: model_report['surface_score'] = 'NA' model_report['interfaces_score'] = 'NA' model_report['comparison_plots'] = 'NA' model_report['assemblied_protomer_exposed_area'] = 'NA' model_report['assemblied_protomer_hydrophobic_area'] = 'NA' model_report['assemblied_protomer_conserved_area'] = 'NA' if 'G' in g_args.assessment: output.append(pctools.subsection('3' + '[G]', 'GESAMT Comparison')) qscore, rmsd, fasta_out, gesamt_output = pctools.run_gesamt( template, template_file, model_oligomer_name, oligomer, None, g_args) output.append(gesamt_output) model_report['gesamt_qscore'] = str(qscore) model_report['gesamt_rmsd'] = str(rmsd) else: model_report['gesamt_qscore'] = 'NA' model_report['gesamt_rmsd'] = 'NA' if 'M' in g_args.assessment: output.append(pctools.subsection('3' + '[M]', 'Molprobity Comparison')) model_molprobity, molprobity_output = pctools.run_molprobity( oligomer, g_args) output.append(molprobity_output) model_report['model_clashscore'] = str(model_molprobity['clashscore']) model_report['model_molprobity'] = str( model_molprobity['molprobity_score']) output.append(clrs['y'] + 'MOLPROBITY COMPARISON' + clrs['n']) output.append('Criterion\tTempl.\tModel') output.append('Rama. Fav.\t' + str(template_molprobity['rama_fav']) + '\t' + str(model_molprobity['rama_fav'])) output.append('Rama. Out.\t' + str(template_molprobity['rama_out']) + '\t' + str(model_molprobity['rama_out'])) output.append('Rot. Out.\t' + str(template_molprobity['rot_out']) + '\t' + str(model_molprobity['rot_out'])) output.append('CBeta Dev.\t' + str(template_molprobity['cb_dev']) + '\t' + str(model_molprobity['cb_dev'])) output.append('Clashscore\t' + str(template_molprobity['clashscore']) + '\t' + str(model_molprobity['clashscore'])) output.append('Molprob. Score\t' + str(template_molprobity['molprobity_score']) + '\t' + str(model_molprobity['molprobity_score'])) molprobity_radar, radar_output = plot_molprobity( model_oligomer_name, model_molprobity, template, template_molprobity) output.append(radar_output) model_report['molprobity_radar'] = molprobity_radar delta_clashscore = (model_molprobity['clashscore'] - template_molprobity['clashscore']) / 10 output.append('Delta clashscore: ' + str(delta_clashscore)) if delta_clashscore >= 1: model_report['quality_score'] = round( 10 - math.log(delta_clashscore**5, 10), 2) else: model_report['quality_score'] = 10 output.append('Final quality score: ' + str(model_report['quality_score'])) else: model_report['model_clashscore'] = 'NA' model_report['model_molprobity'] = 'NA' model_report['quality_score'] = 'NA' if 'M' in g_args.assessment and 'I' in g_args.assessment and not g_args.allow_monomers: if g_args.sequence_mode is False and g_args.skip_conservation is False: model_report['protchoir_score'] = round( sum([ model_report['interfaces_score'], model_report['surface_score'], model_report['quality_score'] ]) / 3, 2) else: model_report['protchoir_score'] = round( sum([ model_report['interfaces_score'], model_report['quality_score'] ]) / 2, 2) elif 'M' in g_args.assessment: model_report['protchoir_score'] = model_report['quality_score'] elif 'I' in g_args.assessment: if g_args.sequence_mode is False and g_args.skip_conservation is False: model_report['protchoir_score'] = round( sum([ model_report['interfaces_score'], model_report['surface_score'] ]) / 2, 2) else: model_report['protchoir_score'] = model_report['interfaces_score'] else: model_report['protchoir_score'] = 'NA' if str(model_report['protchoir_score']) == 'NA': model_report['score_color'] = 'grey' elif model_report['protchoir_score'] <= 5: model_report['score_color'] = 'red' elif 5 < model_report['protchoir_score'] <= 7: model_report['score_color'] = 'orange' elif model_report['protchoir_score'] > 7: model_report['score_color'] = 'green' pickle.dump(model_report, open(model_oligomer_name + '_CHOIR_model_report.pickle', 'wb')) return model_report, '\n'.join(output)