Esempio n. 1
0
def scatter(rep_num, h5_path_regx):
    track = loadTrack(rep_num, h5_path_regx)

    ener, mcc = [], []

    for i in track:
        ener.append(i[1])
        mcc.append(i[2])

    plotScatter(mcc, ener, 'linear_scatter_ener_mcc.pdf')
Esempio n. 2
0
def pearsonrMccEner(rep_num, h5_path_regx):
    """
    read the hdf5 file and calculate the pearsonr correlation coefficient between mcc and energy
    """
    track = loadTrack(rep_num, h5_path_regx)

    ener, mcc = [], []

    for i in track:
        ener.append(i[1])
        mcc.append(i[2])
        
    return pearsonr(mcc, ener)
Esempio n. 3
0
def extractTrack(complx, track_path):
    """
    remove the duplicated data points in raw hdf5 file
    store the extracted data in numpy matrix format
    """
    h5_fn = complx + ".h5"
    data_group = track_path
    dir_path = os.path.abspath(complx)
    report_path = dir_path + "/report"
    h5_path_regx = dir_path + "/out*/*.h5"

    num_prt, num_temp, num_lig, num_rep, temps = readReport(report_path)

    print complx

    for prt_conf in range(num_prt):
        for lig_conf in range(num_lig):
            for tmp_conf in range(num_temp):
                rep_num = num_temp * num_lig * prt_conf + num_lig * tmp_conf + lig_conf
                temp = temps[tmp_conf]
                track = readH5.loadTrack(rep_num, h5_path_regx)

                step = np.array([each_step[0] for each_step in track])
                total_energy = np.array([each_step[1] for each_step in track])
                mcc = np.array([each_step[2] for each_step in track])
                move_vector = np.array([each_step[3] for each_step in track])
                components = np.array([each_step[4] for each_step in track])
                confs = np.vstack(np.array([prt_conf, lig_conf]) for i in range(len(step)))

                dt = np.column_stack((step, mcc, total_energy, components, move_vector, confs))

                f = h5py.File(h5_fn)
                group_path = data_group
                if group_path in f:
                    dset = f[group_path]
                    dset.create_dataset(str(rep_num), data=dt)
                    dset.attrs["prt_conf"] = prt_conf
                    dset.attrs["lig_conf"] = lig_conf
                else:
                    f.create_group(group_path)
                    dset = f[group_path]
                    dset.attrs["prt_conf"] = prt_conf
                    dset.attrs["lig_conf"] = lig_conf
                    dset.create_dataset(str(rep_num), data=dt)
                f.flush()
                f.close()
Esempio n. 4
0
def pearsonrMccEner(rep_num, h5_path_regx, intercept=1, floor_mcc=0.35, ceil_mcc=0.65):
    """
    read the hdf5 file and calculate the pearsonr correlation coefficient between mcc and energy
    """
    track = loadTrack(rep_num, h5_path_regx)
    if intercept == 1:
        intercepted_track = interceptTrack(floor_mcc, ceil_mcc, track)
        track = intercepted_track

    ener, mcc = [], []

    for i in track:
        ener.append(i[1])
        mcc.append(i[2])
        
    if 10000 in mcc:  # drop the mcc with CMCC_INVALID_VAL
        print "CMCC_INVALID_VAL found in", rep_num
        return np.nan, np.nan
    else:
        return pearsonr(mcc, ener)
Esempio n. 5
0
def extractMccTotalEner(complx):
    """
    extract mcc and total energy from the docking tracing
    store the data in hdf5 file
    """
    h5_fn = complx + ".h5"
    data_group = '/mcc_total'
    dir_path = os.path.abspath(complx)
    report_path = dir_path + '/report'
    h5_path_regx = dir_path + "/out*/*.h5"

    num_prt, num_temp, num_lig, num_rep, temps = readReport(report_path)

    print complx

    for prt_conf in range(num_prt):
        for lig_conf in range(num_lig):
            for tmp_conf in range(num_temp):
                rep_num = num_temp * num_lig * prt_conf + num_lig * tmp_conf + lig_conf
                temp = temps[tmp_conf]
                track = loadTrack(rep_num, h5_path_regx)
                total_energy = np.array([each_step[1] for each_step in track])
                mcc = np.array([each_step[2] for each_step in track])

                mcc_ener = np.column_stack((mcc, total_energy))

                f = h5py.File(h5_fn)
                group_path = complx + data_group
                if group_path in f:
                    dset = f[group_path]
                    dset.create_dataset(str(rep_num), data=mcc_ener)
                else:
                    f.create_group(group_path)
                    dset = f[group_path]
                    dset.create_dataset(str(rep_num), data=mcc_ener)
                f.flush()
                f.close()
Esempio n. 6
0
    if intercept == 1:
        intercepted_track = interceptTrack(floor_mcc, ceil_mcc, track)
        track = intercepted_track

    ener, mcc = [], []

    for i in track:
        ener.append(i[1])
        mcc.append(i[2])
        
    if 10000 in mcc:  # drop the mcc with CMCC_INVALID_VAL
        print "CMCC_INVALID_VAL found in", rep_num
        return np.nan, np.nan
    else:
        return pearsonr(mcc, ener)
    
if __name__ == "__main__":
    rep_num = 362
    h5_path_regx = "/work/jaydy/working/docking/1a07C1/out*/*.h5"
    track = loadTrack(rep_num, h5_path_regx)

    ener, mcc = [], []

    for i in track:
        ener.append(i[1])
        mcc.append(i[2])
        
    print pearsonr(mcc, ener)