def execute(self): self._pool = Pool(config.pool_processors) for parser in host_parsers + node_parsers: self._writer.write_header_csv(parser.file_name, parser.csv_header) logging.info('Created all empty csv files') self._pool.starmap( _parse, zip( repeat(self._writer, ), repeat(config.run_log), repeat('simcoin'), Chunker.chunkify(config.run_log, config.file_chunk_size), repeat(host_parsers), )) for node in self._context.nodes.values(): self._pool.starmap( _parse, zip( repeat(self._writer), repeat(node.get_log_file()), repeat(node.name), Chunker.chunkify(node.get_log_file(), config.file_chunk_size), repeat(node_parsers), )) self._pool.close() logging.info('Finished parsing of run_log and all node logs')
def execute(self): self._pool = Pool(config.pool_processors) # We need to generate blank files before writing them # Python doesnt allow those at execution time..? for parser in instance_parsers + node_parsers: write.write_header_csv(parser.file_name, parser.csv_header) logging.info('Created blank csv') self._pool.starmap(_parse, zip( repeat(self._writer,), repeat(config.run_log), repeat('test'), Chunker.chunkify(config.run_log, config.file_chunk_size), repeat(instance_parsers), )) for node in self._context.nodes.values(): self._pool.starmap(_parse, zip( repeat(self._writer), repeat(node.get_log_file()), repeat(node.name), Chunker.chunkify(node.get_log_file(), config.file_chunk_size), repeat(node_parsers), )) self._pool.close() logging.info('Parsing done')
def loadMovieLensParallel(self): self.data = {} #init objects pool = mp.Pool(self.cores) jobs = [] #create jobs fname = self.path + "u.data" for chunkStart, chunkSize in Chunker.chunkify(fname): jobs.append( pool.apply_async(self.process_data, (fname, chunkStart, chunkSize))) #wait for all jobs to finish for job in jobs: job.get() #clean up pool.close() #init objects pool = mp.Pool(self.cores) jobs = [] fname = self.path + "u.item" for chunkStart, chunkSize in Chunker.chunkify(fname): jobs.append( pool.apply_async(self.process_item, (fname, chunkStart, chunkSize))) #wait for all jobs to finish for job in jobs: job.get() #clean up pool.close() #init objects pool = mp.Pool(self.cores) jobs = [] fname = self.path + "u.user" for chunkStart, chunkSize in Chunker.chunkify(fname): jobs.append( pool.apply_async(self.process_users, (fname, chunkStart, chunkSize))) #wait for all jobs to finish for job in jobs: job.get() #clean up pool.close()
def readMovies(self, path='/ml-25m'): self.data = {} #init objects pool = mp.Pool(8) jobs = [] #create jobs fname = self.path + path + "/ratings.csv" for chunkStart, chunkSize in Chunker.chunkify(fname): jobs.append( pool.apply_async(self.process_data, (fname, chunkStart, chunkSize))) for job in jobs: job.get() #clean up pool.close()