def parse_file(file_name, cores, jobs_per_core, stats): file_size = os.path.getsize(file_name) chunks = int(cores * jobs_per_core) queue = pprocess.Queue(limit=cores) parse_chunk_async = queue.manage(pprocess.MakeParallel(parse_chunk)) temp_dir = tempfile.mkdtemp('.wf2') mappers = {} for count_name in count_names: mappers[count_name] = pprocess.start(map_count, count_name, temp_dir) c = 0 for (start, end) in file_offsets(file_size, chunks): c += 1 parse_chunk_async(file_name, temp_dir, start, end, c) stats._output('all map jobs queued') for job_id in queue: start_reduce_time = time.time() stats._output('map job finished: pid=%d' % job_id) for c in range(len(count_names)): mappers[count_names[c]].send(job_id) stats._output('all map jobs finished') for (mapper, count_name) in ((mappers[count_name], count_name) for count_name in count_names): mapper.send(None) print pickle.load(open(mapper.receive(), 'rb')) stats._output('reduce job finished: name=%s' % count_name) shutil.rmtree(temp_dir)
def activate(self): channel = pprocess.start(self.run, None, None, named1=None, named2=None) exchange = pprocess.Exchange() exchange.add(channel) return exchange
def fill_window(filenames, i, window_size, exchange, slice_size, delay=None): """ Fill a "window" of channels using the given 'filenames', starting from index 'i' in that list, choosing a number of channels limited to 'window_size' and adding those channels to the given 'exchange'. Each channel will be employed by a parser which will be given a number of filenames limited to 'slice_size', and which will be requested to wait periodically if specified by the optional 'delay'. """ number = 0 limit = len(filenames) active = len(exchange.active()) while i < limit and number < window_size - active: j = i + slice_size channel = start(apply_parser, filenames[i:j], delay) exchange.add(channel) i = j number += 1 return i
def activate(self): channel = pprocess.start(self.run) return channel
def activate(self): # print "XXXXXXX" channel = pprocess.start(self.run) # print "YYYYYYY" return channel
def activate(self): # print ("XXXXXXX") channel = pprocess.start(self.run) # print ("YYYYYYY") return channel
#!/usr/bin/env python import pprocess class Ex(pprocess.Exchange): def store_data(self, channel): data = channel.receive() print "Parent:", data channel.send(data - 1) print "Parent sent" def child(channel, data): while data != 0: print "Child:", data channel.send(data) print "Child sent" data = channel.receive() ex = Ex() ex.add(pprocess.start(child, 20)) ex.finish() # vim: tabstop=4 expandtab shiftwidth=4
#!/usr/bin/env python from pprocess import start def loop(channel, limit): print "loop to", limit i = channel.receive() while i < limit: print i i = channel.receive() channel.send("Done") if __name__ == "__main__": limit = 100 channel = start(loop, limit) for i in range(0, limit + 1): channel.send(i) print channel.receive() # vim: tabstop=4 expandtab shiftwidth=4