def collectSvcData(self, dc="high"): """ construct the data chart for SVC, only the -high-DecoyRepEner.txt and -low-DecoyRepEner.txt used """ if (dc == "high"): raw_data_paths = suffix.getAbsPaths(self.raw_data_dir, self.extds[0]) # high decoys elif (dc == "low"): raw_data_paths = suffix.getAbsPaths(self.raw_data_dir, self.extds[1]) # low decoys elif (dc == "all"): raw_data_paths = suffix.getAbsPaths(self.raw_data_dir, self.extds[2]) # all decoys else: print "dc set to be high or low" suffix.checkPaths(raw_data_paths) tmp_data = [] for fn in raw_data_paths: tmp_data.append(pd.read_csv(fn)) self.svc_sheet = pd.concat(tmp_data, ignore_index=True) self.svc_sheet.headers = self.rep_ener_columns self.svc_sheet = self.svc_sheet.drop(['Unnamed: 0'], axis=1) # rule out trash columns self.svc_sheet = self.svc_sheet.sort(columns='mcc', ascending=False) self.svc_sheet.index = range(len(self.svc_sheet.index))
def main(rootdir, extd, output_file = 'svm_in'): """ read multiple xxx-DecoyRepEner.txt files, concat them convert to the format that libsvm accept """ files_to_read = suffix.getAbsPaths(rootdir, extd) pprint.pprint(files_to_read) data = getConcatedData(files_to_read) data.to_csv(output_file, sep=' ',index=False, header=False)
def main(rootdir, extd, output_file = 'svm_in'): """ read multiple xxx-DecoyRepEner.txt files, concat them convert to the format that libsvm accept """ files_to_read = suffix.getAbsPaths(rootdir, extd) data = Concat(files_to_read) data = reFormat(data) data.to_csv(output_file, sep=' ',index=False, header=False)
def __init__(self, ff='-0.4.ff', dc='high', svc='svc', svr='svr', data_base_dir='/work/jaydy/dat/output/output/FF_opt/', devs=3): self.ff = ff self.raw_data_dir = data_base_dir + ff[1:] # raw input data dir self.dc = dc # high, low, all self.extd = '-' + dc + '-DecoyRepEner.txt' # extend name self.extds = ['-high-DecoyRepEner.txt', '-low-DecoyRepEner.txt', '-all-DecoyRepEner.txt'] self.raw_data_paths = suffix.getAbsPaths(self.raw_data_dir, self.extd) # paths of the raw input data files self.concated_fn = dc + '-raw' # name of the concated file self.rep_ener_columns = ['_evdw', '_eele', '_epmf', '_ehpc', '_ehdb', '_edst', '_epsp', '_ekde', '_elhm', 'mcc'] self.svc = svc # name self.svr = svr # name self.svc_sheet = pd.DataFrame() self.svr_sheet = pd.DataFrame() self.devs = devs