Esempio n. 1
0
def read_train_data():
    pro_list = []
    lig_list = []
    for index in range(1, 2):
        digit = format(index, '04d')
        file_name_pro = 'training_data/' + digit + '_pro_cg.pdb'
        file_name_lig = 'training_data/' + digit + '_lig_cg.pdb'
        pro = read_pdb(file_name_pro)
        lig = read_pdb(file_name_lig)
        pro_list.append(pro)
        lig_list.append(lig)
Esempio n. 2
0
def read_raw_data(n, file_type):
    if (n < 1) or (n > 3000):
        return "Please provide a valid number of files between 1 to 3000"
    if (file_type != "lig") and (file_type != "pro"):
        return "Please provide a valid file type which should be 'lig' or 'pro'"
    res = []
    for i in range(1, n + 1):
        file_number = ("0000" + str(i))[-4:]
        file_name = "{}_{}_cg.pdb".format(file_number, file_type)
        X_list, Y_list, Z_list, atomtype_list = read_pdb(
            "./training_data/{}".format(file_name))
        res.append([X_list, Y_list, Z_list, atomtype_list])
    return res
def plot_distribution_range_xyz():
    num = []
    x = []
    y = []
    z = []
    for i in range(1, 3001):
        print(i)
        X_list, Y_list, Z_list, atomtype_list = read_pdb(
            '../data/training_data/%s_pro_cg.pdb' % str(i).zfill(4))
        num.append((len(X_list)))
        x.append(max(X_list) - min(X_list))
        y.append(max(Y_list) - min(Y_list))
        z.append(max(Z_list) - min(Z_list))

    fig = plt.figure(dpi=128, figsize=(12, 6))
    fig.autofmt_xdate()
    fig.suptitle('Spatial Range VS Number of Atom within a Protein')

    ax = plt.subplot(131)
    ax.set_title('X Range VS Number of Atoms')
    ax.set_ylabel('X')
    ax.set_xlabel('The Numbers of Atoms within a Protein')
    ax.scatter(num, x, label='Protein')

    plt.legend()
    plt.grid(True)

    ax = plt.subplot(132)
    ax.set_title('Y Range VS Number of Atoms')
    ax.set_ylabel('Y')
    ax.set_xlabel('The Numbers of Atoms within a Protein')
    ax.scatter(num, y, label='Protein')
    plt.legend()
    plt.grid(True)

    ax = plt.subplot(133)
    ax.set_title('Z Range VS Number of Atoms')
    ax.set_ylabel('Z')
    ax.set_xlabel('The Numbers of Atoms within a Protein')
    ax.scatter(num, z, label='Protein')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.subplots_adjust(top=0.88)
    plt.show()
def extract_data(i, type):
    x_list, y_list, z_list, atomtype_list = read_pdb('../data/training_data/%s_%s_cg.pdb' % (str(i).zfill(4), type))
    data = np.transpose(np.array([x_list, y_list, z_list, atomtype_list]))
    return np.array(data)