def reJoinRepDecoy(self): """rebuild the decoys energies of the representative pt in each cluster""" with open(self.rep_dc_file, "w") as coords_output_buffer: for file_dic in self.file_list: for key in file_dic.keys(): print key, "==>", file_dic[key] decoy_file = file_dic["decoy_file"] mcc_lst_file = file_dic["mcc_lst_file"] energy_file = file_dic["energy_file"] cluster_file = file_dic["cluster_file"] mcc_matrix_file = file_dic["mcc_matrix_file"] dc_coord = DecoyCoord(origial_text=decoy_file) dc_coord.setDecoyUnitLines() mcc_matrix = np.loadtxt(mcc_matrix_file, skiprows=1) cluster_array = np.loadtxt(cluster_file, dtype=int) energy_sheet = pd.read_csv(energy_file, sep="\s", header=None) # separeated by space energy_sheet.columns = self.energy_columns mcc_lst = pd.read_csv(mcc_lst_file, names=self.mcc_name) source_sheet = pd.concat([energy_sheet, mcc_lst], axis=1) # concat along the x axis for cluster_num, cluster in pick_rep.reGroup(cluster_array): conf_rep = pick_rep.pickRep(mcc_matrix, cluster) self.rep_sheet = self.rep_sheet.append( source_sheet.ix[conf_rep], ignore_index=False ) # append to the rep energy sheet dc_coord.appendRepCoords(coords_output_buffer, conf_rep) # append to the rep coord file self.rep_sheet = self.rep_sheet.sort( columns="mcc", ascending=False ) # resort according mcc value, descending order self.rep_sheet.to_csv(self.rep_ener_file)
def secondReJoinRepDecoy(self): """ rebuild the dataframe of features of the all, high, low decoys """ self.buildSecondFn() self.displaySecondFn() self.initSecondRepFeatureFrame() # input file_dic = self.final_dic mcc_matrix_file = file_dic['mcc_matrix_file'] features_file = file_dic['features_file'] cluster_file = file_dic['cluster_file'] # output rep_all_ener_file = file_dic['rep_all_ener_file'] rep_low_ener_file = file_dic['rep_low_ener_file'] rep_high_ener_file = file_dic['rep_high_ener_file'] # loading mcc_matrix = np.loadtxt(mcc_matrix_file, skiprows=1) cluster_array = np.loadtxt(cluster_file, dtype=int) feature_sheet = pd.read_csv(features_file) # separeated by comman, which is default for cluster_num, cluster in pick_rep.reGroup(cluster_array): conf_rep = pick_rep.pickRep(mcc_matrix, cluster) self.all_rep_sheet = self.all_rep_sheet.append(feature_sheet.ix[conf_rep], ignore_index=True) # append to the rep energy sheet self.all_rep_sheet = self.all_rep_sheet.sort(columns='mcc', ascending=False) # resort according mcc value, descending order self.high_rep_sheet = self.all_rep_sheet[self.all_rep_sheet['mcc'] > 0.6] self.low_rep_sheet = self.all_rep_sheet[self.all_rep_sheet['mcc'] < 0.4] self.all_rep_sheet.to_csv(rep_all_ener_file) self.high_rep_sheet.to_csv(rep_high_ener_file) self.low_rep_sheet.to_csv(rep_low_ener_file)
def secondTestReJoinRepDecoy(self): """ rebuild the dataframe of features of the all, high, low decoys """ self.buildTestSecondFn() self.displaySecondFn() self.initSecondRepFeatureFrame() # input file_dic = self.final_dic mcc_matrix_file = file_dic["mcc_matrix_file"] features_file = file_dic["features_file"] cluster_file = file_dic["cluster_file"] mcc_file = file_dic["mcc_file"] # output rep_all_ener_file = file_dic["rep_all_ener_file"] rep_low_ener_file = file_dic["rep_low_ener_file"] rep_high_ener_file = file_dic["rep_high_ener_file"] # loading # mcc_matrix = np.loadtxt(mcc_matrix_file, skiprows=1) cluster_array = np.loadtxt(cluster_file, dtype=int) feature_sheet = pd.read_csv(features_file, sep="\s+") # separeated by comman, which is default mcc_sheet = pd.read_csv(mcc_file, header=None) mcc_sheet.columns = ["mcc"] feature_sheet = pd.concat([feature_sheet, mcc_sheet], axis=1) # import random # max_conf_num = len(feature_sheet['mcc']) for cluster_num, cluster in pick_rep.reGroup(cluster_array): # conf_rep = pick_rep.pickRep(mcc_matrix, cluster) conf_rep = choice(cluster) - 1 # conf_rep = random.randint(0, max_conf_num) self.all_rep_sheet = self.all_rep_sheet.append( feature_sheet.ix[conf_rep], ignore_index=True ) # append to the rep energy sheet self.all_rep_sheet = self.all_rep_sheet.sort( columns="mcc", ascending=False ) # resort according mcc value, descending order self.high_rep_sheet = self.all_rep_sheet[self.all_rep_sheet["mcc"] > 0.6] self.low_rep_sheet = self.all_rep_sheet[self.all_rep_sheet["mcc"] < 0.4] self.all_rep_sheet.to_csv(rep_all_ener_file) self.high_rep_sheet.to_csv(rep_high_ener_file) self.low_rep_sheet.to_csv(rep_low_ener_file)