pdbids.remove('2O4V') # Make group objects. I'm going to be associating a lot of stuff with # each protein, it's easiest to just group them together. groupdict = CIDict([(pdbid, Group(pdbid)) for pdbid in pdbids]) # The slow part: load structures. structure_dir = '../pymol/structures' def filename(pdbid): return structure_dir + '/aligned_{}.pdb'.format(pdbid) # Daniel's aligned structures give "invalid/missing occupancy" and # "invalid/missing B factor" warnings - thousands of them! Have to filter # warnings or the structures won't get loaded with warnings.catch_warnings(): warnings.simplefilter('ignore') for group in groupdict.values(): group.structure = PDBParser().get_structure(group.name, filename(group.name)) print('structures loaded after ' + str(time.time() - start)) afterstructures = time.time() # Open the asymmetric ezbeta spreadsheets used for retrieving DSSP results # and the residue numbers of the residues included in the dataset phrasebooks = biodata.phrasebooks('weights phrasebook.csv') weights_phrasebook = phrasebooks['weights'] for group in groupdict.values(): csv_name = group.name.upper() + '.csv' if csv_name in os.listdir('non ppi residues'): group.non_ppi_data=biodata.Spreadsheet('non ppi residues/'\ + csv_name,