def updateAll(self): log.info("## starting repository update ##") start = time.time() #log.info("%d,%d,%d", self.cpu_count, self.processing_pool_size, self.packaging_pool_size) log.info("# CPU count: %d, proc. pool: %d, pack pool: %d", self.cpu_count, self.processing_pool_size, self.packaging_pool_size) log.info("# queues: source: %d, pack: %d, publish: %d", self.source_queue_size, self.packaging_queue_size, self.publish_queue_size) log.info("## updating Monav repository" ) log.info('# monav preprocessor threads: %d', self.monav_preprocessor_threads) log.info('# max parallel pp. per package: %d', self.monav_parallel_threads) ppThreshold = self.monav_parallel_threshold if ppThreshold: log.info('# parallel pp. threshold: %d MB' % ppThreshold) start_monav = time.time() monav = MonavRepository(self) monav.update() # log how long the Monav update took dt_monav = int(time.time() - start_monav) if dt_monav > 60: prettyTime = "%s (%d s)" % (utils.prettyTimeDiff(dt_monav), dt_monav) # show seconds for exact benchmarking once pretty time # switches to larger units else: prettyTime = utils.prettyTimeDiff(dt_monav) log.info("## Monav repository updated in %s " % prettyTime) log.info("## Summary:") log.info("## Monav repository updated in %s (%d s)" % (utils.prettyTimeDiff(dt_monav), dt_monav) ) # log how long the repository update took dt = int(time.time() - start) if dt > 60: prettyTime = "%s (%d s)" % (utils.prettyTimeDiff(dt), dt) else: prettyTime = utils.prettyTimeDiff(dt) log.info("## Repository updated in %s " % prettyTime) log.info("## repository update finished ##")
print(log_folder) print("log data for this update run might not be gathered or the update run might fail outright") planet_update_log = os.path.join(log_folder, "update_planet.log") planet_split_log = os.path.join(log_folder, "split_planet.log") continents_split_log = os.path.join(log_folder, "split_continents.log") print("starting modRana repository source data update") start=time.time() print("updating the planet osm file") planet_update_rc = os.system("./tools/update_planet.py>%s" % planet_update_log) # only do the sanity check if the download was successful if planet_update_rc == 0: planet_update_rc = os.system("./tools/sanity_check_update_planet.py>>%s" % planet_update_log) dt = int(time.time() - start) print("planet osm file update finished in %s" % prettyTimeDiff(dt)) if planet_update_rc > 0: print("ERROR: planet file update failed, aborting source data update") exit(1) print("splitting the planet into continent sized chunks") start1=time.time() os.system("./tools/split_planet.sh>%s" % planet_split_log) rc = os.system("./tools/sanity_check_split_planet.py>>%s" % planet_split_log) dt = int(time.time() - start1) print("planet splitting finished in %s" % prettyTimeDiff(dt)) if rc > 0: print("ERROR: planet splitting sanity check failed, aborting source data update") exit(1) print("splitting the continents into regions")
def process(self, threads=(1, 1), parallel_threshold=None): """process the PBF extract into Monav routing data""" monav_threads, max_parallel_preprocessors = threads # check the parallel threshold if parallel_threshold is not None: try: file_size = os.path.getsize(self._source_data_path) except OSError: file_size = 0 # check if file size in MB is larger than threshold if (file_size / (2 ** 20)) > parallel_threshold: # if threshold is crossed, don't run preprocessors in parallel max_parallel_preprocessors = 1 try: if parallel_threshold is None and max_parallel_preprocessors == 1: process_log.info('processing %s', self.name) elif max_parallel_preprocessors == 1: process_log.info('processing %s in 1 thread (threshold reached)', self.name) else: # >1 process_log.info('processing %s in %d threads', self.name, max_parallel_preprocessors) start_time = time.time() input_file = self._source_data_path output_folder = self._temp_storage_path base_INI_Path = os.path.join(self._helper_path, "base.ini") preprocessor_path = self._preprocessor_path def get_task(mode_name, mode_profile, index): """prepare task that runs the preprocessor in temPath separate for every preprocessor instance and move the result to resultPath NOTE: the temporary folder is used to avoid multiple preprocessors mixing their temporary data""" temp_output_folder = os.path.join(output_folder, str(index)) result_path = os.path.join(temp_output_folder, "routing_%s" % mode_name) # compile arguments args = ['%s' % preprocessor_path, '-di', '-dro="%s"' % mode_name, '-t=%d' % monav_threads, '--verbose', '--settings="%s"' % base_INI_Path, '--input="%s"' % input_file, '--output="%s"' % temp_output_folder, '--name="%s"' % self.name, '--profile="%s"' % mode_profile, '-dd'] return args, result_path, self._temp_storage_path def run_preprocessor(queue): """this function is run in an internal pool inside the Monav package""" while True: current_task = queue.get() if current_task == repo.SHUTDOWN_SIGNAL: queue.task_done() return args, from_path, to_path = current_task # create the independent per-preprocessor path os.makedirs(from_path) # open /dev/null so that the stdout & stderr output for the command can be dumped into it dev_null = open(os.devnull, "w") # call the preprocessor subprocess.call(reduce(lambda x, y: x + " " + y, args), shell=True, stdout=dev_null, stderr=dev_null) # move the results to the main folder shutil.move(from_path, to_path) # cleanup dev_null.close() queue.task_done() tasks = [ get_task("car", "motorcar", 0), get_task("bike", "bicycle", 1), get_task("pedestrian", "foot", 2) ] preproc_queue = mp.JoinableQueue() for i in range(max_parallel_preprocessors): p = mp.Process(target=run_preprocessor, args=(preproc_queue,)) p.daemon = True p.start() for task in tasks: preproc_queue.put(task) # wait for the processes to finish preproc_queue.join() # shutdown them down for i in range(max_parallel_preprocessors): preproc_queue.put(repo.SHUTDOWN_SIGNAL) preproc_queue.join() # run preprocessors in parallel (depending on current settings) # pool = mp.Pool(processes=maxParallelPreprocessors) # pool.map(runPreprocessor, tasks) # closing the pool is important, otherwise the workers in the poll will # not exit - after a while the inactive threads will accumulate and # no more new ones can be started # pool.close() # just to be sure # pool.join() td = int(time.time() - start_time) process_log.info('processed %s in %s', self.name, utils.prettyTimeDiff(td)) return True except Exception: message = 'monav package: Monav routing data processing failed\n' message += 'name: %s' % self.name process_log.exception(message) return False