def mapreduce(self, sample_name, output_name): """ mapreduce in hadoop """ # STEP1: dumbo main start access_args = " -param v='%s' " % (self.v) access_args += " -param num='%s' " % (self.num) if self.is_hadoop: access_args += " -file " + self.w_matrix_filename access_args += " -param w_matrix_filename='%s' " % \ (os.path.basename(self.w_matrix_filename)) else: access_args += " -param w_matrix_filename='%s' " % \ (self.w_matrix_filename) access_args += self.get_commonlib() mapreduce_routine(is_hadoop=self.is_hadoop, exe_program=self.exe_elsvm, input_file=os.path.join(self.data_path, sample_name), output_file=os.path.join(self.output_path, output_name), access_args=access_args, content="EL-SVM MapReduce Process") # STEP2: dump output self.model_args = os.path.join(self.local_path, output_name) if self.is_hadoop: cat_routine(input_file=os.path.join(self.output_path, output_name), output_file=self.model_args) return self.model_args
def test(self, models_name, input_name, output_name): """ """ # STEP1: dumbo main start access_args = " -overwrite yes " if self.is_hadoop: access_args += " -file " + models_name access_args += " -param models_filename='%s' " % \ os.path.basename(models_name) access_args += self.get_commonlib() mapreduce_routine(is_hadoop=self.is_hadoop, exe_program=self.exe_test, input_file=os.path.join(self.data_path, input_name), output_file=os.path.join(self.output_path, output_name), access_args=access_args, content="Varify EL-SVL MapReduce Process") # STEP2: dump the result if self.is_hadoop: cat_routine(input_file=os.path.join(self.output_path, output_name), output_file=self.final_result_filename) # STEP3: output result self.show_result()
def mapreduce_core(self, sample_name, output_name, exe_file=None, is_cat=True, args=None, input_path=None, output_path=None): """ core mapreduce in hadoop """ # STEP1: dumbo main start input_path = self.data_path if input_path is None else input_path output_path = self.output_path if output_path is None else output_path mapreduce_routine(is_hadoop=self.is_hadoop, exe_program=exe_file, input_file=os.path.join(input_path, sample_name), output_file=os.path.join(output_path, output_name), access_args=args, content="EL-SVM MapReduce Process") # STEP2: dump output if not is_cat: return self.model_args = os.path.join(self.local_path, output_name) if self.is_hadoop: cat_routine(input_file=os.path.join(self.output_path, output_name), output_file=self.model_args) return self.model_args