def cs_load_spreadsheets(workingdir, groupdict):
    phrasebooks = biodata.phrasebooks(workingdir + '/misc/phrasebooks.csv')
    ezbeta_phrasebook = phrasebooks['dan_ezbeta']
    load_spreadsheets(groupdict, workingdir + \
                      '/ez beta data',
                      ['(.*)_DeltaBurial.csv$'],phrasebook = ezbeta_phrasebook,
                      attribute_name = 'ez_data')
def cs_load_spreadsheets(workingdir, groupdict):
    phrasebooks = biodata.phrasebooks(workingdir + "/misc/phrasebooks.csv")
    ezbeta_phrasebook = phrasebooks["dan_ezbeta"]
    load_spreadsheets(
        groupdict,
        workingdir + "/ez beta data",
        ["(.*)_DeltaBurial.csv$"],
        phrasebook=ezbeta_phrasebook,
        attribute_name="ez_data",
    )
    return structure_dir + '/aligned_{}.pdb'.format(pdbid)
# Daniel's aligned structures give "invalid/missing occupancy" and
# "invalid/missing B factor" warnings - thousands of them! Have to filter
# warnings or the structures won't get loaded
with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    for group in groupdict.values():
        group.structure = PDBParser().get_structure(group.name,
                                                    filename(group.name))

print('structures loaded after ' + str(time.time() - start))
afterstructures = time.time()

# Open the asymmetric ezbeta spreadsheets used for retrieving DSSP results
# and the residue numbers of the residues included in the dataset
phrasebooks = biodata.phrasebooks('weights phrasebook.csv')
weights_phrasebook = phrasebooks['weights']
for group in groupdict.values():
    csv_name = group.name.upper() + '.csv'
    if csv_name in os.listdir('non ppi residues'):
        group.non_ppi_data=biodata.Spreadsheet('non ppi residues/'\
                                                 + csv_name,
                                        phrasebook = weights_phrasebook)
    if csv_name in os.listdir('ppi residues'):
        group.ppi_data = biodata.Spreadsheet('ppi residues/'\
                                             +csv_name,
                                        phrasebook = weights_phrasebook)

print('loading spreadsheets took ' + str(time.time() - afterstructures))

# Instead of select_by_predicate, selections will be created by