Esempio n. 1
0
    def collectSvcData(self, dc="high"):
        """
        construct the data chart for SVC,
        only the -high-DecoyRepEner.txt and -low-DecoyRepEner.txt used
        """
        if (dc == "high"):
            raw_data_paths = suffix.getAbsPaths(self.raw_data_dir, self.extds[0])     # high decoys
        elif (dc == "low"):
            raw_data_paths = suffix.getAbsPaths(self.raw_data_dir, self.extds[1])      # low decoys
        elif (dc == "all"):
            raw_data_paths = suffix.getAbsPaths(self.raw_data_dir, self.extds[2])      # all decoys
        else:
            print "dc set to be high or low"    

        suffix.checkPaths(raw_data_paths)

        tmp_data = []
        for fn in raw_data_paths:
            tmp_data.append(pd.read_csv(fn))
        self.svc_sheet = pd.concat(tmp_data, ignore_index=True)

        self.svc_sheet.headers = self.rep_ener_columns
        self.svc_sheet = self.svc_sheet.drop(['Unnamed: 0'], axis=1)      # rule out trash columns

        self.svc_sheet = self.svc_sheet.sort(columns='mcc', ascending=False)
        self.svc_sheet.index = range(len(self.svc_sheet.index))
Esempio n. 2
0
def main(rootdir, extd, output_file = 'svm_in'):
    """
    read multiple xxx-DecoyRepEner.txt files, concat them
    convert to the format that libsvm accept
    """
    files_to_read = suffix.getAbsPaths(rootdir, extd)
    pprint.pprint(files_to_read)
    data = getConcatedData(files_to_read)
    data.to_csv(output_file, sep=' ',index=False, header=False)
Esempio n. 3
0
def main(rootdir, extd, output_file = 'svm_in'):
    """
    read multiple xxx-DecoyRepEner.txt files, concat them
    convert to the format that libsvm accept
    """
    files_to_read = suffix.getAbsPaths(rootdir, extd)
    data = Concat(files_to_read)
    data = reFormat(data)
    data.to_csv(output_file, sep=' ',index=False, header=False)
Esempio n. 4
0
 def __init__(self, ff='-0.4.ff', dc='high', svc='svc', svr='svr', data_base_dir='/work/jaydy/dat/output/output/FF_opt/', devs=3):
     self.ff = ff
     self.raw_data_dir = data_base_dir + ff[1:]  # raw input data dir
     self.dc = dc    # high, low, all
     self.extd = '-' + dc + '-DecoyRepEner.txt' # extend name
     self.extds = ['-high-DecoyRepEner.txt', '-low-DecoyRepEner.txt', '-all-DecoyRepEner.txt']
     self.raw_data_paths = suffix.getAbsPaths(self.raw_data_dir, self.extd)      # paths of the raw input data files
     self.concated_fn = dc + '-raw'  # name of the concated file 
     self.rep_ener_columns = ['_evdw', '_eele', '_epmf', '_ehpc', '_ehdb', '_edst', '_epsp', '_ekde', '_elhm', 'mcc']
     self.svc = svc      # name
     self.svr = svr      # name
     self.svc_sheet = pd.DataFrame()
     self.svr_sheet = pd.DataFrame()
     self.devs = devs