예제 #1
0
def parse_file(file_name, cores, jobs_per_core, stats):
    file_size = os.path.getsize(file_name)
    chunks = int(cores * jobs_per_core)
    
    queue = pprocess.Queue(limit=cores)
    parse_chunk_async = queue.manage(pprocess.MakeParallel(parse_chunk))
    
    temp_dir = tempfile.mkdtemp('.wf2')
    
    mappers = {}
    for count_name in count_names:
        mappers[count_name] = pprocess.start(map_count, count_name, temp_dir)
    
    c = 0
    for (start, end) in file_offsets(file_size, chunks):
        c += 1
        parse_chunk_async(file_name, temp_dir, start, end, c)
    
    stats._output('all map jobs queued')
    for job_id in queue:
        start_reduce_time = time.time()
        
        stats._output('map job finished: pid=%d' % job_id)
        for c in range(len(count_names)):
            mappers[count_names[c]].send(job_id)
    
    stats._output('all map jobs finished')
    for (mapper, count_name) in ((mappers[count_name], count_name) for count_name in count_names):
        mapper.send(None)
        print pickle.load(open(mapper.receive(), 'rb'))
        stats._output('reduce job finished: name=%s' % count_name)
    
    shutil.rmtree(temp_dir)
예제 #2
0
 def activate(self):
     channel = pprocess.start(self.run,
                              None,
                              None,
                              named1=None,
                              named2=None)
     exchange = pprocess.Exchange()
     exchange.add(channel)
     return exchange
예제 #3
0
def fill_window(filenames, i, window_size, exchange, slice_size, delay=None):

    """
    Fill a "window" of channels using the given 'filenames', starting from index
    'i' in that list, choosing a number of channels limited to 'window_size' and
    adding those channels to the given 'exchange'. Each channel will be
    employed by a parser which will be given a number of filenames limited to
    'slice_size', and which will be requested to wait periodically if specified
    by the optional 'delay'.
    """

    number = 0
    limit = len(filenames)
    active = len(exchange.active())
    while i < limit and number < window_size - active:
        j = i + slice_size
        channel = start(apply_parser, filenames[i:j], delay)
        exchange.add(channel)
        i = j
        number += 1
    return i
 def activate(self):
     channel = pprocess.start(self.run)
     return channel
예제 #5
0
    def activate(self):
#        print "XXXXXXX"
        channel = pprocess.start(self.run)
#        print "YYYYYYY"
        return channel
예제 #6
0
 def activate(self):
     #        print ("XXXXXXX")
     channel = pprocess.start(self.run)
     #        print ("YYYYYYY")
     return channel
 def activate(self):
   channel = pprocess.start(self.run)
   return channel
예제 #8
0
 def activate(self):
     #        print "XXXXXXX"
     channel = pprocess.start(self.run)
     #        print "YYYYYYY"
     return channel
예제 #9
0
 def activate(self):
     channel = pprocess.start(self.run, None, None, named1=None, named2=None)
     exchange = pprocess.Exchange()
     exchange.add(channel)
     return exchange
예제 #10
0
#!/usr/bin/env python

import pprocess

class Ex(pprocess.Exchange):
    def store_data(self, channel):
        data = channel.receive()
        print "Parent:", data
        channel.send(data - 1)
        print "Parent sent"

def child(channel, data):
    while data != 0:
        print "Child:", data
        channel.send(data)
        print "Child sent"
        data = channel.receive()

ex = Ex()
ex.add(pprocess.start(child, 20))
ex.finish()

# vim: tabstop=4 expandtab shiftwidth=4
예제 #11
0
#!/usr/bin/env python

from pprocess import start

def loop(channel, limit):
    print "loop to", limit
    i = channel.receive()
    while i < limit:
        print i
        i = channel.receive()
    channel.send("Done")

if __name__ == "__main__":
    limit = 100
    channel = start(loop, limit)
    for i in range(0, limit + 1):
        channel.send(i)
    print channel.receive()

# vim: tabstop=4 expandtab shiftwidth=4
예제 #12
0
    def activate(self):
#        print ("XXXXXXX")
        channel = pprocess.start(self.run)
#        print ("YYYYYYY")
        return channel