def WMulti(files, file, testSet, fout, features, name, clfName, dp, convertToBinary=True): train = [] for file2 in files: if (file2[0:3] == file[0:3] and file2 < file < 0): train.append(file2) if (len(train)): trainSet = DPLIB.LoadCSV(train, dp, features, convertToBinary) if (name.lower().find("infogain") >= 0): #int indi[] = DPLIB.fSelectInfoGain(trainSet); #if (DPLIB.useIterativeInfoGainSubsetting) #{ # indi = DPLIB.iterativeInfoGainSubsetting(trainSet, indi,clfName); #} #else # indi = DPLIB.getTopX(indi); #trainSet = DPLIB.fSelectSet(trainSet, indi); #testSet = DPLIB.fSelectSet(testSet, indi); pass l = GLOB(clfName).getClassifier() l.buildClassifier(trainSet) vec = l.evaluateModel(testSet) tvals = DPLIB.getConfMatrix(testSet[:, -1], vec) auc = DPLIB.getAUC(testSet[:, -1], vec) vals = DPLIB.getMeasures(tvals) print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc)) fout.write("\n" + name + ":" + file + ": " + " AUC = " + str(auc) + ";" + "Vals=" + str(vals)) else: print(name + ":" + file + ": " + "!!!" + " AUC = !!!") fout.write("\n" + name + ":" + file + ": !!!")
def run(self): lrnrnames = self.lrnrs try: rnd = random.Random(Common.getCurrentTimeMil()) if self.expType == 'GIS': if self.isKS: self.gis = GISKS2(self.pars,self.file) else: self.gis = GIS(self.pars) elif self.expType == 'LSH': lsh = CPDP_LSH_Binary(self.pars, self.file) trainSetAll = DPLIB.LoadCSV(self.train, self.dp, self.features, convertToBinary = not self.isCount); testSetAll = DPLIB.LoadCSV(self.test, self.dp, self.features, convertToBinary = not self.isCount); ft = 'A' indi = None if not self.isCount: if self.pars['features'] == 'Iterative InfoGain Subsetting': ft = 'IG' indi = DPLIB.fSelectInfoGain(trainSetAll); if self.pars['features'] == 'All': print ('All') if self.pars['features'] == 'PCA': ft = 'PCA' print ('PCA') trainSetAll, testSetAll = DPLIB.applyPCA(trainSetAll, testSetAll, 0.95) for lk in range(len(lrnrnames)): lrnr = "-" + lrnrnames[lk]; clfName = lrnrnames[lk]; vSets = None if not self.isCount: if self.pars['features'] == 'Iterative InfoGain Subsetting': indis2 = DPLIB.iterativeInfoGainSubsetting(trainSetAll, indi, clfName); trainSetAll = DPLIB.fSelectSet(trainSetAll, indis2); testSetAll = DPLIB.fSelectSet(testSetAll, indis2); if self.pars['vSetType'] in ['Single Random','Multiple Random']: vSets = DatasetUtils.getRandomDatasets() c = 0 while (c < self.iters): print("Start:" + self.file + ": " + str(c)); print("===================================================="); #fout.write("#ITERINFO:For File=" +self.file + "-Iter:" + str(c) + "\n"); stages = None buckets = None sbtx = "" if self.expType == 'GIS': self.doGIS(trainSetAll, testSetAll, "FIXED-VMUL-GEN-"+ft, lrnr, fout, vSets, False, clfName, gis=gis); gis.prnt('---------------------------------------\n') elif self.expType == 'LSH': lsh.CreateBucketsTune(trainSetAll, testSetAll, vSets, name= "LSHTune-ALL-TOP-SUPER" + sbtx + lrnr, testCut= self.pars['tunecut'], iternum=c, save=False, superbit=self.pars['lshType'] =='SuperBit', clfName=clfName,tunelrn = self.pars['tunelrnr']); lsh.prnt('---------------------------------------\n') c+=1 #fout.write("===================================================================\n"); #fout.close(); print("File Processing Ended:" +self.file); except Exception as e: try: print (str(e)) print(traceback.format_exc()) except Exception as ex2: print("X2", str(ex2)); print(traceback.format_exc()) if self.expType == 'GIS': return gis elif self.expType == 'LSH': return lsh