def cs_load_spreadsheets(workingdir, groupdict): phrasebooks = biodata.phrasebooks(workingdir + '/misc/phrasebooks.csv') ezbeta_phrasebook = phrasebooks['dan_ezbeta'] load_spreadsheets(groupdict, workingdir + \ '/ez beta data', ['(.*)_DeltaBurial.csv$'],phrasebook = ezbeta_phrasebook, attribute_name = 'ez_data')
def cs_load_spreadsheets(workingdir, groupdict): phrasebooks = biodata.phrasebooks(workingdir + "/misc/phrasebooks.csv") ezbeta_phrasebook = phrasebooks["dan_ezbeta"] load_spreadsheets( groupdict, workingdir + "/ez beta data", ["(.*)_DeltaBurial.csv$"], phrasebook=ezbeta_phrasebook, attribute_name="ez_data", )
return structure_dir + '/aligned_{}.pdb'.format(pdbid) # Daniel's aligned structures give "invalid/missing occupancy" and # "invalid/missing B factor" warnings - thousands of them! Have to filter # warnings or the structures won't get loaded with warnings.catch_warnings(): warnings.simplefilter('ignore') for group in groupdict.values(): group.structure = PDBParser().get_structure(group.name, filename(group.name)) print('structures loaded after ' + str(time.time() - start)) afterstructures = time.time() # Open the asymmetric ezbeta spreadsheets used for retrieving DSSP results # and the residue numbers of the residues included in the dataset phrasebooks = biodata.phrasebooks('weights phrasebook.csv') weights_phrasebook = phrasebooks['weights'] for group in groupdict.values(): csv_name = group.name.upper() + '.csv' if csv_name in os.listdir('non ppi residues'): group.non_ppi_data=biodata.Spreadsheet('non ppi residues/'\ + csv_name, phrasebook = weights_phrasebook) if csv_name in os.listdir('ppi residues'): group.ppi_data = biodata.Spreadsheet('ppi residues/'\ +csv_name, phrasebook = weights_phrasebook) print('loading spreadsheets took ' + str(time.time() - afterstructures)) # Instead of select_by_predicate, selections will be created by