def create_newdata(header, x): dlist = [] cnt = 0 tmp = 0 hlist = [] if (V): print('Reducing attributes by ' + str(options['turfpct']) + '%') sys.stdout.flush() for a in table: if (cnt >= keepcnt): lost[a[0]] = iteration + 1 hlist.append(a[0]) # append lost attribe names to hlist i = header.index(a[0]) dlist.append(i) cnt += 1 header = np.delete(header, dlist).tolist() #remove orphans from header x = np.delete(x, dlist, axis=1) #remove orphaned attributes from data x = np.ascontiguousarray(x, dtype=np.double) if (V): print('Getting new variables, attributes and distance array') sys.stdout.flush() var = cmn.getVariables(header, x, y, options) attr = cmn.getAttributeInfo(header, x, var, options) if (V): print("--------------- Parameters ---------------") print("datatype: " + var['dataType']) print("attributes: " + str(var['NumAttributes'])) if (var['dataType'] == 'mixed'): print(" continuous: " + str(var['cpct'][1])) print(" discrete: " + str(var['dpct'][1])) if (var['mdcnt'] > 0): print("missing: " + str(var['mdcnt'])) print("--------------------------------------------") sys.stdout.flush() begin = tm.time() diffs, cidx, didx = cmn.dtypeArray(header, attr, var) if (var['mdcnt'] > 0): import mmDistance as md distArray = md.getDistances(x[:, cidx], x[:, didx], var, diffs[cidx]) disttype = "missing" else: distArray = cmn.getDistances(x, attr, var, cidx, didx) disttype = "discrete/continuous/mixed" if (V): print(disttype + " distance array elapsed time(sec) = " + str(tm.time() - begin)) sys.stdout.flush() return header, x, attr, var, distArray, lost
if (var['classType'] == 'multiclass'): yset = var['phenoTypeList'] print(" classes: " + str(len(yset))) print("classname: " + var['phenoTypeName']) print("algorithm: " + options['algorithm']) print("--------------------------------------------") sys.stdout.flush() #-----------------------------------------------------------------------------# # create distance array and remove intermediate data # if missing and/or mixed data use the mixedDistance function # begin = tm.time() diffs, cidx, didx = cmn.dtypeArray(header, attr, var) if (var['mdcnt'] > 0): import mmDistance as md distArray = md.getDistances(x[:, cidx], x[:, didx], var, diffs[cidx]) disttype = "missing" else: distArray = cmn.getDistances(x, attr, var, cidx, didx, cheader) disttype = "discrete/continuous/mixed" if (V): ctime = "[" + tm.strftime("%H:%M:%S") + "]" print(ctime + " " + disttype + " distance array time(sec) = " + str(tm.time() - begin)) sys.stdout.flush() ############################################################################# ################################################################################################################################################### def test_relieff_Multiplexer(): """ Test ReliefF on 6-bit Multiplexer"""
yset = var['phenoTypeList'] print(" classes: " + str(len(yset))) print("datatype: " + var['dataType']) print("classname: " + var['phenoTypeName']) print("algorithm: " + options['algorithm']) print("--------------------------------------------") sys.stdout.flush() #-----------------------------------------------------------------------------# # create distance array and remove intermediate data # if missing and/or mixed data use the mixedDistance function # begin = tm.time() diffs, cidx, didx = cmn.dtypeArray(header, attr, var) if(var['mdcnt'] > 0): import mmDistance as md distArray = md.getDistances(x[:,cidx], x[:,didx], var, diffs[cidx]) disttype = "missing" else: distArray = cmn.getDistances(x, attr, var, cidx, didx, cheader) disttype = "discrete/continuous/mixed" if(V): ctime = "[" + tm.strftime("%H:%M:%S") + "]" print(ctime + " " + disttype + " distance array time(sec) = " + str(tm.time()-begin)) sys.stdout.flush() #-----------------------------------------------------------------------------# # get Scores based on algorithm selected (-a and -t) # if(turfpct > 0): # Use TURF import Turf as T
def create_newdata(header, x): dlist = [] cnt = 0 if(V): print('Reducing attributes by ' + str(options['turfpct']) + '%') sys.stdout.flush() #Go through table with feature sorted by decreasing scores, once we hit keepcnt, we start adding to lost. for a in table: if(cnt >= keepcnt): lost[a[0]] = iteration + 1 i = header.index(a[0]) dlist.append(i) #store position of each feature removed in dlist. cnt += 1 #update header and dataset to reflect removal of lowest scoring features. header = np.delete(header,dlist).tolist() #remove orphans from header x = np.delete(x,dlist,axis=1) #remove orphaned attributes from data x = np.ascontiguousarray(x, dtype=np.double) if(V): print('Getting new variables, attributes and distance array') sys.stdout.flush() #Redo data survey (which may save time in downstream distance array calculation (depending on dataset) var = cmn.getVariables(header, x, y, options) attr = cmn.getAttributeInfo(header, x, var, options) cheader = [] for i in header: if attr[i][0] == 'continuous': cheader.append(i) if(V): print("--------------- Parameters ---------------") print("datatype: " + var['dataType']) print("attributes: " + str(var['NumAttributes'])) if(var['dataType'] == 'mixed'): print(" continuous: " + str(var['cpct'][1])) print(" discrete: " + str(var['dpct'][1])) if(var['mdcnt'] > 0): print("missing: " + str(var['mdcnt'])) print("--------------------------------------------") sys.stdout.flush() begin = tm.time() diffs, cidx, didx = cmn.dtypeArray(header, attr, var) #Calculate distance array based on present feature types and data missingness. if(var['mdcnt'] > 0): import mmDistance as md distArray = md.getDistances(x[:,cidx], x[:,didx], var, diffs[cidx]) disttype = "missing" else: distArray = cmn.getDistances(x, attr, var, cidx, didx, cheader) disttype = "discrete/continuous/mixed" if(V): print(disttype + " distance array elapsed time(sec) = " + str(tm.time()-begin)) sys.stdout.flush() return header, x, attr, var, distArray, lost