def scatter(rep_num, h5_path_regx): track = loadTrack(rep_num, h5_path_regx) ener, mcc = [], [] for i in track: ener.append(i[1]) mcc.append(i[2]) plotScatter(mcc, ener, 'linear_scatter_ener_mcc.pdf')
def pearsonrMccEner(rep_num, h5_path_regx): """ read the hdf5 file and calculate the pearsonr correlation coefficient between mcc and energy """ track = loadTrack(rep_num, h5_path_regx) ener, mcc = [], [] for i in track: ener.append(i[1]) mcc.append(i[2]) return pearsonr(mcc, ener)
def extractTrack(complx, track_path): """ remove the duplicated data points in raw hdf5 file store the extracted data in numpy matrix format """ h5_fn = complx + ".h5" data_group = track_path dir_path = os.path.abspath(complx) report_path = dir_path + "/report" h5_path_regx = dir_path + "/out*/*.h5" num_prt, num_temp, num_lig, num_rep, temps = readReport(report_path) print complx for prt_conf in range(num_prt): for lig_conf in range(num_lig): for tmp_conf in range(num_temp): rep_num = num_temp * num_lig * prt_conf + num_lig * tmp_conf + lig_conf temp = temps[tmp_conf] track = readH5.loadTrack(rep_num, h5_path_regx) step = np.array([each_step[0] for each_step in track]) total_energy = np.array([each_step[1] for each_step in track]) mcc = np.array([each_step[2] for each_step in track]) move_vector = np.array([each_step[3] for each_step in track]) components = np.array([each_step[4] for each_step in track]) confs = np.vstack(np.array([prt_conf, lig_conf]) for i in range(len(step))) dt = np.column_stack((step, mcc, total_energy, components, move_vector, confs)) f = h5py.File(h5_fn) group_path = data_group if group_path in f: dset = f[group_path] dset.create_dataset(str(rep_num), data=dt) dset.attrs["prt_conf"] = prt_conf dset.attrs["lig_conf"] = lig_conf else: f.create_group(group_path) dset = f[group_path] dset.attrs["prt_conf"] = prt_conf dset.attrs["lig_conf"] = lig_conf dset.create_dataset(str(rep_num), data=dt) f.flush() f.close()
def pearsonrMccEner(rep_num, h5_path_regx, intercept=1, floor_mcc=0.35, ceil_mcc=0.65): """ read the hdf5 file and calculate the pearsonr correlation coefficient between mcc and energy """ track = loadTrack(rep_num, h5_path_regx) if intercept == 1: intercepted_track = interceptTrack(floor_mcc, ceil_mcc, track) track = intercepted_track ener, mcc = [], [] for i in track: ener.append(i[1]) mcc.append(i[2]) if 10000 in mcc: # drop the mcc with CMCC_INVALID_VAL print "CMCC_INVALID_VAL found in", rep_num return np.nan, np.nan else: return pearsonr(mcc, ener)
def extractMccTotalEner(complx): """ extract mcc and total energy from the docking tracing store the data in hdf5 file """ h5_fn = complx + ".h5" data_group = '/mcc_total' dir_path = os.path.abspath(complx) report_path = dir_path + '/report' h5_path_regx = dir_path + "/out*/*.h5" num_prt, num_temp, num_lig, num_rep, temps = readReport(report_path) print complx for prt_conf in range(num_prt): for lig_conf in range(num_lig): for tmp_conf in range(num_temp): rep_num = num_temp * num_lig * prt_conf + num_lig * tmp_conf + lig_conf temp = temps[tmp_conf] track = loadTrack(rep_num, h5_path_regx) total_energy = np.array([each_step[1] for each_step in track]) mcc = np.array([each_step[2] for each_step in track]) mcc_ener = np.column_stack((mcc, total_energy)) f = h5py.File(h5_fn) group_path = complx + data_group if group_path in f: dset = f[group_path] dset.create_dataset(str(rep_num), data=mcc_ener) else: f.create_group(group_path) dset = f[group_path] dset.create_dataset(str(rep_num), data=mcc_ener) f.flush() f.close()
if intercept == 1: intercepted_track = interceptTrack(floor_mcc, ceil_mcc, track) track = intercepted_track ener, mcc = [], [] for i in track: ener.append(i[1]) mcc.append(i[2]) if 10000 in mcc: # drop the mcc with CMCC_INVALID_VAL print "CMCC_INVALID_VAL found in", rep_num return np.nan, np.nan else: return pearsonr(mcc, ener) if __name__ == "__main__": rep_num = 362 h5_path_regx = "/work/jaydy/working/docking/1a07C1/out*/*.h5" track = loadTrack(rep_num, h5_path_regx) ener, mcc = [], [] for i in track: ener.append(i[1]) mcc.append(i[2]) print pearsonr(mcc, ener)