def get(self, input_source): # job_id = input_source[0]['job_id'] # partition_id = input_source[0]['partition_id'] debug_print_by_name('kaijie', str(input_source[0]['streaming_data'])) streaming_data = input_source[0]['streaming_data'] #self.data= input_source['streaming_data'][job_id][partition_id] data = self.partition_intermediate_rst(streaming_data, self.num_rst_partitions) return data
def partition(self): rst = {} debug_print_by_name('wentao', str(self.data)) for i in range(len(self.data)): hashcode = self.hash_func(self.data[i][0]) if hashcode in rst: rst[hashcode].append(self.data[i]) else: rst[hashcode] = [self.data[i]] return rst
def get(self, input_source): # debug_print("[Map-RDD] InputSource is {0}".format(input_source)) if not self.data: element = self.parent.get(input_source) if element == None: return None else: element_new = [] for e in element: element_new.append(self.func(e)) self.data = element_new debug_print_by_name('wentao', str(self.data)) self.data = self.partition_intermediate_rst(self.data, self.num_rst_partitions) return self.data