blakeOutput = [] tracyOutput = [] # Data with feature selection subSetData = [] subSetDataHeader = ['cpu', 'mem', 'project', 'ru_wallclock', 'io'] # List of symbolic columns symCols = [2] # Cluster count clustCounts = [5, 6] #[3, 4, 5, 6, 7, 8] data, head = util.readData(acctData, False) for idx, item in enumerate(util.column(data, head.index('failed'))): if int(item) == 0: cpuIndex = head.index('cpu') memIndex = head.index('mem') projIndex = head.index('project') clockIndex = head.index('ru_wallclock') ioIndex = head.index('io') subSetData.append([ \ str(data[idx][cpuIndex]), \ str(data[idx][memIndex]), \ str(data[idx][projIndex]), \ str(data[idx][clockIndex]), \ str(data[idx][ioIndex])]) newData, keyDict = util.convertSymbolic(subSetData, symCols, True)