def RandomForest_ALL(self): if not os.path.exists(self.RF_dir): os.makedirs(self.RF_dir) for i in range(1,101): path = "{0}/random{1}".format(self.RF_dir,i) if not os.path.exists(path): os.makedirs(path) self.ReadCombine() p = MyPool(self.thred_num) p.map(self.RandomSampling,range(1,101)) p.map(self.RandomForest,range(1,101))
def DataShrink(self,vec_time): def func(i): output_dir = "SVM/Training/{0}".format(self.id) path = "{0}/training_{1}".format(output_dir, i) train_linker, train_domain = self.DataDivide(path) random.shuffle(train_domain) vec_num = vec_time * len(train_linker) if vec_num > len(train_domain): print "Vector number is too learge to decrease!" exit(1) train_sampled = train_linker + train_domain[:vec_num] filename = "{0}/training_sampled_{1}_{2}".format(output_dir,vec_time,i) with open(filename, "w") as fp: fp.writelines(" ".join(x)+"\n" for x in train_sampled) p = MyPool(self.thred_num) p.map(func,range(1,6))
def MakeVectorDataForCross(self): TASKS = [] for x in ["Training", "Test"]: dir_name = "{0}/SVM/{1}/{2}".format(self.work_dir,x,self.id) if not os.path.isdir(dir_name): os.makedirs(dir_name) for y in range(1,6): if x == "Training": linker_f = "{0}/Crosslist_{1}".format(self.linker_dir, y) output_f = "{0}/training_{1}".format(dir_name, y) elif x == "Test": linker_f = "{0}/crosslist_{1}".format(self.linker_dir, y) output_f = "{0}/test_{1}".format(dir_name, y) #make list of arguments TASKS.append((linker_f, output_f)) linker_f = "{0}/svmcrosslist".format(self.linker_dir) output_f = "{0}/SVM/Test/{1}/test_all".format(self.work_dir,self.id) TASKS.append((linker_f,output_f)) #########multiprocessing p = MyPool(self.thred_num) p.map(self.MakeVectorData,TASKS)
def SVM_Multi(self): p = MyPool(self.thred_num) p.map(self.SVMLearn, range(1,6)) p.map(self.SVMClassify, range(1,6)) command = "cat {0}/output_? > {0}/output_sw1".format(self.output_dir) os.system(command)