def jointinfo(joint, rep='CE'): """ Print info relating to all occurences of this joint in the data, in the given representation. The ranges given are the 5th and 95th percentiles of the combined data set. e.g. try jointinfo(D18 D18_j1_D14 CcapD14') """ print('\nJoint: {}'.format(joint)) num_params = param_split(rep) for rp, j in joint_locations(joint): print('\nrp{}\n'.format(rp)) print('{:12} {:10} {:10}'.format('Parameter', 'Mean', 'Range')) params = [] names = joint_parameter_names(j, rep) for sim in range(1, 101): # for each simulation, get the parameters for the representation p = get_parameters(rp, sim, rep) # only keep the the ones relevant to this joint params.append(p[np.array(joint_parameter_locations(j, rep))]) params = np.stack(params) for i in range(np.shape(params)[1]): if i < num_params[0]: mean = np.mean(params[:, i]) fifth = np.percentile(params[:, i], 5) ninetyfifth = np.percentile(params[:, i], 95) else: mean = anglemean(params[:, i]) fifth, ninetyfifth = anglepercentiles(params[:, i]) print('{:12} {:<10.3f} ({:.3f},{:.3f})'.format( names[i], mean, fifth, ninetyfifth))
def jointdistribution(joint, parameter, rep='CE'): """ The distribution of a parameter that appears in more than one joint in the data. e.g. jointdistribution('D18 D18_j1_D14 CcapD14', 'Length 1') -> a list of 200 data points, corresponding to the two occurences of this joint in the data """ pos = parameter_position(parameter, rep) params = []; for rp, j in joint_locations(joint): for sim in range(1, 101): # for each simulation, get the parameters for the representation p = get_parameters(rp, sim, rep) # only keep the the ones relevant to this joint params.append(p[pos]) return np.array(params)
def newproteindistributions(protein): """ Get the distributions of all parameters for a new protein sequence, using the 'CE' representation. e.g. newproteindistribution('NcapD14 D14_j4_D79 D79_j2_D14 D14_j2_D79 CcapD79') -> a list of lists, each containing all the data points relevant to a single parameter (first is 'Length 1' etc.). """ modules = protein.split() # construct a list of the joint names joints = [' '.join(modules[i:i+3]) for i in range(len(modules)-2)] # numbers of lengths, angles and dihedrals for new protein num_params = param_split('CE', len(modules)) # initialise an empty array for the data for each parameter param_dists = [[] for i in range(sum(num_params))] for joint_num, joint_name in enumerate(joints): params = []; for rp, j in joint_locations(joint_name): for sim in range(1, 101): # for each simulation, get the parameters for the representation p = get_parameters(rp, sim, 'CE') # only keep the the ones relevant to this joint params.append(p[np.array(joint_parameter_locations(j, 'CE'))]) # turn params into a matrix with columns containing the distribution # of each parameter params= np.stack(params) # append the parameters in the correct place in the new protein param_locs = joint_parameter_locations(joint_num, 'CE', protein_length=len(modules)) for i in range(len(param_locs)): param_dists[param_locs[i]].extend(params[:,i]) return param_dists