if len(SCORE_RESULTS[clf]) >= i + 1: outline += str(SCORE_RESULTS[clf][i]) outline += ',' outfile.write(outline + '\n') print('Done.') def draw_graphs(): print('Making histograms...') for clf in CLASSIFICATIONS: plt.hist(SCORE_RESULTS[clf], bins=100) plt.title('Molprobity ' + clf) ax = plt.gca() ax.set_ylabel('Count') ax.set_xlabel('Clipper Score') ax.set_yscale('log') plt.savefig(os.path.join(TESTING_OUTPUT_DIR, 'rama_hist_' + clf.lower() + '.png'), dpi=600) plt.close() if __name__ == '__main__': setup() PDB_IDS = get_available_pdb_ids() PDB_REPORT_DATA = load_pdb_report_data() ramalyze_all() export_results() draw_graphs() cleanup_all_pdb_redo_dirs()
shortlists[0].append(pdb_id) elif status == 200: if [ os.path.exists(os.path.join(PDB_REDO_DATA_DIR, pdb_id, pdb_id + suffix + '.gz')) for suffix in PDB_REDO_SUFFIXES ].count(True) < 4: shortlists[1].append(pdb_id) elif retry_failed_ids: shortlists[2].append(pdb_id) print('Unchecked IDs:', len(shortlists[0])) print('Successes to retry:', len(shortlists[1])) print('Failures to retry:', len(shortlists[2])) compiled_shortlist = [ pdb_id for shortlist in shortlists for pdb_id in shortlist ] shortlist_length = len(compiled_shortlist) # For each PDB ID in the shortlist, check if it exists in PDB-REDO, and if so, add it to the download queue for i, pdb_id in enumerate(compiled_shortlist): response = requests.get(PDB_REDO_URL + pdb_id) PDB_REDO_RECORD[pdb_id] = response.status_code print(pdb_id + ' ' + str(response.status_code) + ' ::: ' + str(i) + ' / ' + str(shortlist_length)) if response.status_code == 200: queue.put(pdb_id) # Every 10 codes, write out the record file if i % 10 == 0 or i == shortlist_length-1: with open(PDB_REDO_RECORD_PATH, 'w') as outfile: for pdb_id in pdb_ids: outfile.write(pdb_id + ',' + str(PDB_REDO_RECORD[pdb_id]) + '\n') if __name__ == '__main__': setup() PDB_REPORT_VALUES = load_pdb_report_data() scrape_pdb_redo() print('Done.')