Ejemplo n.º 1
0
def jointinfo(joint, rep='CE'):
    """
    Print info relating to all occurences of this joint in the data, in the
    given representation. The ranges given are the 5th and 95th percentiles
    of the combined data set.

    e.g. try
    jointinfo(D18 D18_j1_D14 CcapD14')
    """
    print('\nJoint: {}'.format(joint))

    num_params = param_split(rep)

    for rp, j in joint_locations(joint):
        print('\nrp{}\n'.format(rp))
        print('{:12} {:10} {:10}'.format('Parameter', 'Mean', 'Range'))
        params = []
        names = joint_parameter_names(j, rep)
        for sim in range(1, 101):
            # for each simulation, get the parameters for the representation
            p = get_parameters(rp, sim, rep)
            # only keep the the ones relevant to this joint
            params.append(p[np.array(joint_parameter_locations(j, rep))])
        params = np.stack(params)

        for i in range(np.shape(params)[1]):
            if i < num_params[0]:
                mean = np.mean(params[:, i])
                fifth = np.percentile(params[:, i], 5)
                ninetyfifth = np.percentile(params[:, i], 95)
            else:
                mean = anglemean(params[:, i])
                fifth, ninetyfifth = anglepercentiles(params[:, i])
            print('{:12} {:<10.3f} ({:.3f},{:.3f})'.format(
                names[i], mean, fifth, ninetyfifth))
def jointdistribution(joint, parameter, rep='CE'):
    """
    The distribution of a parameter that appears in more than one joint in the
    data.

    e.g.
    jointdistribution('D18 D18_j1_D14 CcapD14', 'Length 1') ->
    a list of 200 data points, corresponding to the two occurences of this
    joint in the data
    """
    pos = parameter_position(parameter, rep)
    params = [];
    for rp, j in joint_locations(joint):
        for sim in range(1, 101):
            # for each simulation, get the parameters for the representation
            p = get_parameters(rp, sim, rep)
            # only keep the the ones relevant to this joint
            params.append(p[pos])
    return np.array(params)
def newproteindistributions(protein):
    """
    Get the distributions of all parameters for a new protein sequence,
    using the 'CE' representation.

    e.g.
    newproteindistribution('NcapD14 D14_j4_D79 D79_j2_D14 D14_j2_D79 CcapD79') ->
    a list of lists, each containing all the data points relevant to a single
    parameter (first is 'Length 1' etc.).
    """
    modules = protein.split()

    # construct a list of the joint names
    joints = [' '.join(modules[i:i+3]) for i in range(len(modules)-2)]

    # numbers of lengths, angles and dihedrals for new protein
    num_params = param_split('CE', len(modules))

    # initialise an empty array for the data for each parameter
    param_dists = [[] for i in range(sum(num_params))]

    for joint_num, joint_name in enumerate(joints):
        params = [];
        for rp, j in joint_locations(joint_name):
            for sim in range(1, 101):
                # for each simulation, get the parameters for the representation
                p = get_parameters(rp, sim, 'CE')
                # only keep the the ones relevant to this joint
                params.append(p[np.array(joint_parameter_locations(j, 'CE'))])

        # turn params into a matrix with columns containing the distribution
        # of each parameter
        params= np.stack(params)

        # append the parameters in the correct place in the new protein
        param_locs = joint_parameter_locations(joint_num,
                                               'CE',
                                               protein_length=len(modules))
        for i in range(len(param_locs)):
            param_dists[param_locs[i]].extend(params[:,i])

    return param_dists