def start_navitia_with_single_coverage(navitia_docker_compose_file_path, navitia_docker_compose_file_name, coverage_name, extend_wait_time=False): """ Start Navitia server with only default coverage (using docker-compose) :param navitia_docker_compose_file_path: path where docker-compose file exists :param extend_wait_time: whether an extended time of wait should be applied. Should be set to True when Navitia docker compose is started up the first time (images are being downloaded from the web) :return: Whether Navitia was started successfully with default coverage """ _log.info("Attempting to start Navitia with %s coverage", coverage_name) # run the docker- compose and redirect logs to prevent from printing in the output navitia_docker_start_command = "docker-compose -f" + navitia_docker_compose_file_name + " -p navitia-docker-compose up --remove-orphans" subprocess.Popen(navitia_docker_start_command, shell=True, cwd=navitia_docker_compose_file_path, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) # Longer wait time is required because images are being re-downloaded if extend_wait_time: t_wait = 60 * 5 else: t_wait = 60 * 3 _log.info("Waiting %s seconds to validate Navitia docker is up and running", t_wait) time.sleep(t_wait) # Check if coverage is up and running is_default_up = check_coverage_running(get_navitia_url_for_cov_status(coverage_name), coverage_name) if not is_default_up: return False return True
def backup_past_coverage(container, coverage_name): """ Copy a given coverage graph to the local host running this script :param container: Navitia worker container :param coverage_name: the coverage graph name to copy """ # Create a local file for writing the incoming graph local_processed_folder = Path(os.getcwd()).parent / "processed" _log.info("Going to copy %s.nav.lz4 to %s on local host", coverage_name, local_processed_folder) local_graph_file = open(os.path.join(local_processed_folder, coverage_name + '.nav.lz4'), 'wb') # Fetch the graph file bits, stat = container.get_archive('/srv/ed/output/' + coverage_name + '.nav.lz4') size = stat["size"] # Generate a progress bar pbar = createProgressBar(size, action="Transferring") # Fetch size_iterator = 0 for chunk in bits: if chunk: file_write_update_progress_bar(chunk, local_graph_file, pbar, size_iterator) size_iterator += len(chunk) local_graph_file.close() pbar.finish() _log.info("Finished copying %s.nav.lz4 to %s on local host", coverage_name, os.getcwd())
def main(gtfsdate) : # config variables to be moved to config-file downstrem coverage_name, navitia_docker_compose_file_path, navitia_docker_compose_file_name = utils.get_config_params(gtfsdate) try: # Stop docker containers running docker_client = utils.get_docker_service_client() containers = docker_client.containers.list() if len(containers) > 0: print(containers) utils.stop_all_containers(utils.get_docker_service_client()) # Get the docker service client docker_client = utils.get_docker_service_client() containers = docker_client.containers.list(filters={"name": "worker"}) if len(containers) == 0: _log.info("Navitia docker containers are down, bringing them up with on_demand coverage ") utils.start_navitia_with_single_coverage(navitia_docker_compose_file_path, navitia_docker_compose_file_name, coverage_name) containers = docker_client.containers.list(filters={"name": "worker"}) print(containers) except Exception as e: raise Exception
def delete_file_from_host(file_name): """ Delete a file from the host running this script :param file_name: the file name to be deleted """ if os.path.isfile(file_name): os.remove(file_name) _log.info("Finished deleting %s from host", file_name)
def stop_all_containers(docker_client): """ Stop all the running docker containers :param docker_client: docker client """ _log.info("Going to stop all Docker containers") for container in docker_client.containers.list(): container.stop() _log.info("Stopped all Docker containers")
def delete_file_from_container(container, file_name): """ Delete a filefrom Navitia worker container :param container: Navitia worker container :param file_name: the name of the file to be removed """ delete_command= "/bin/sh -c \"rm " + file_name + "\"" exit_code, output = container.exec_run(cmd=delete_command, stdout=True, workdir="/srv/ed/output/") if exit_code != 0: _log.error("Couldn't delete %s graph", file_name) return False _log.info("Finished deleting %s from container %s", file_name, container.name)
def get_file_from_url_http(url, file_name, file_path, _log): """ Downloads a file to the working directory :param url: HTTP utl to downloads from - not an FTP URL :return: file name of the downloaded content in the working directory """ # Preparing file for fetching local_file_path_and_name = Path(os.getcwd()).parent / file_path / file_name _log.info("Going to download the latest osm from %s to %s", url, local_file_path_and_name) download_complete = False download_attempts = 1 max_download_attemtps = 24 while not download_complete: if not download_complete and 24 > download_attempts > 1: _log.error( "%s is unreachable. Sleeping for 60 minutes and trying again. This is attempt %s out of " "%s attempts", url, download_attempts, max_download_attemtps) time.sleep(60 * 60) if not download_complete and download_attempts > 24: _log.error( "%s is unreachable for more than 24 hours. Aborting update", url) raise Exception download_attempts += 1 try: r = requests.get(url, stream=True) file = open(local_file_path_and_name, 'wb') # Creating a progress bar size = int(r.headers['Content-Length']) pbar = createProgressBar(size) # Fetching global size_iterator size_iterator = 0 for chunk in r.iter_content(chunk_size=1024): if chunk: file_write_update_progress_bar(chunk, file, pbar) file.close() pbar.finish() _log.info("Finished loading latest OSM to: %s", local_file_path_and_name) download_complete = True return except Exception as e: continue
def generate_gtfs_with_transfers(gtfs_file_name, gtfs_file_path): """ Generate a GTFS ZIP file with a processed transfers.txt file compatible with Navitia's server requirements for extending transfers between stops in graph calculation :param gtfs_file_name: GTFS zip file name :param gtfs_file_path: GTFS zip file path :return: the name of the GTFS file """ gtfs_file_path_name = os.path.join(gtfs_file_path, gtfs_file_name) _log.info("Extracting stops.txt and computing transfers.txt") output_path = generate_transfers_file(os.path.join(gtfs_file_path,gtfs_file_name[:-4])) with zipfile.ZipFile(gtfs_file_path_name, 'a') as zip_ref: zip_ref.write(output_path, arcname="transfers.txt") _log.info("Added transfers.txt to %s", gtfs_file_path_name)
def move_current_to_past(container, source_cov_name, dest_cov_name): """ Move the Navitia graph of the source coverage to the destination coverage so in next re-start changes are applied :param container: the worker container of Navitia :param source_cov_name: the name of the coverage to take the graph from (usually "default") :param dest_cov_name: the name of the coverage to move the graph to (e.g. "secondary-cov") :return: whether the move was successful, a RunTimeError is thown if not """ command_list = "/bin/sh -c \"mv " + source_cov_name + ".nav.lz4 "+ dest_cov_name + ".nav.lz4\"" exit_code, output = container.exec_run(cmd=command_list, stdout=True, workdir="/srv/ed/output/") if exit_code != 0: _log.error("Couldn't change %s to %s", source_cov_name, dest_cov_name) raise RuntimeError _log.info("Changed the name of %s.nav.lz4 to %s.nav.lz4", source_cov_name, dest_cov_name) return True
def generate_ondemand_docker_config_file(navitia_docker_compose_file_path, navitia_docker_compose_file_name): ''' Creates a custom docker-compose file for on-demand environment from the docker-israel-custom-instances.yml file ''' navitia_docker_compose_file = open(os.path.join(navitia_docker_compose_file_path, 'docker-compose.yml'), mode='r') navitia_docker_compose_file_contents = navitia_docker_compose_file.read() navitia_docker_compose_file.close() custom_custom_docker_file_contents = navitia_docker_compose_file_contents.replace("default", "ondemand-" + cfg.gtfsdate) with open(os.path.join(navitia_docker_compose_file_path, navitia_docker_compose_file_name), mode='w+') as custom_custom_docker_file: custom_custom_docker_file.write(custom_custom_docker_file_contents) custom_custom_docker_file.close() _log.info("Created custom docker-compose file: %s", navitia_docker_compose_file_name)
def unzip_gtfs(gtfs_zip_file_name, gtfspath, _log): """ Unzip gtfs to gtfspath """ pardir = Path(os.getcwd()).parent gtfs_contets_folder = Path( os.getcwd()).parent / gtfspath / gtfs_zip_file_name if not os.path.isfile(gtfs_contets_folder): _log.error( "%s does not exist - please check correct GTFS date is configured", gtfs_zip_file_name) raise Exception _log.info("Going to unzip %s file to %s", gtfs_zip_file_name, gtfspath) dest_folder = pardir / gtfspath / gtfs_zip_file_name[: -4] # removing the .zip end if not os.path.exists(dest_folder): os.mkdir(dest_folder) shutil.unpack_archive(gtfs_contets_folder, extract_dir=dest_folder, format='zip') _log.info("Finished unzipping")
def validate_graph_changes_applied(coverage_name): """ Validate that the coverage has a different start of production date different from before """ current_start_service_date = process_date.get_date_now() if cfg.ttm_server_on == "aws_ec2": time_map_server_url = cfg.time_map_server_aws_url else: time_map_server_url = cfg.time_map_server_local_url cov_sop_date = get_coverage_start_production_date(coverage_name) if cov_sop_date == "" or not check_prod_date_is_valid_using_heat_map(time_map_server_url, coverage_name, current_start_service_date): _log.error("The %s coverage seems not to be up-to-date following update attempts." "\n A call for heat map data with %s date returned no results", coverage_name, current_start_service_date) return False _log.info("%s coverage is now updated with new start-of-production date %s\n." "Can be accessed via %s%s", coverage_name, current_start_service_date, time_map_server_url, coverage_name) return True
def copy_file_into_docker(container, dest_path, file_path, file_name): """ Copy a given file to a destination folder in a Docker container :param container: container object :param dest_path: destination folder path inside the container :param file_path: source path of the file on the host :param file_name: the file name to be copied """ _log.info("Going to copy %s to %s at %s", file_name, container.name, dest_path) # Read the file file = open(Path(os.getcwd()).parent / file_path / file_name, 'rb') file = file.read() try: # Convert to tar file tar_stream = BytesIO() file_tar = tarfile.TarFile(fileobj=tar_stream, mode='w') tarinfo = tarfile.TarInfo(name=file_name) tarinfo.size = len(file) file_tar.addfile(tarinfo, BytesIO(file)) file_tar.close() # Put in the container tar_stream.seek(0) success = container.put_archive( path=dest_path, data=tar_stream ) if success: _log.info("Finished copying %s to %s at %s", file_name, container.name, dest_path) else: raise FileNotFoundError except FileNotFoundError as err: _log.error("Couldn't copy %s to %s at %s", file_name, container.name, dest_path) raise err
def is_cov_exists(container, coverage_name): _log.info("Checking if %s exists in /srv/ed/output of %s", coverage_name, container.name) file_list_command = "/bin/sh -c \"ls\"" exit_code, output = container.exec_run(cmd=file_list_command, stdout=True, workdir="/srv/ed/output/") exists = coverage_name in str(output) if exists: _log.info("%s exists in /srv/ed/output of %s", coverage_name, container.name) else: _log.info("%s doesn't exists in /srv/ed/output of %s", coverage_name, container.name) return exists
def validate_auto_graph_changes_applied(coverage_name, default_coverage_name, default_cov_prev_sop_date, docker_client, navitia_docker_compose_file_path, navitia_docker_compose_file_name, navitia_docker_compose_default_file_name): """ Validate that the new default coverage returns results for heat map query for current_start_service_date (as in dates file or gtfs date) and that secondary-cov has results for the previous production date of the default. :param default_coverage_name: The coverage that gets a new (usually more recent) start of production date :param secondary_custom_coverage_name: The coverage that gets a the original default_coverage start of production date :param default_cov_sop_date: start of production date of original default coverage (before changes applied) :return: whether the graph changes were applied """ current_start_service_date = dt.strptime(process_date.get_date_now(), "%Y%m%d") if cfg.ttm_server_on == "aws_ec2": time_map_server_url = cfg.time_map_server_aws_url else: time_map_server_url = cfg.time_map_server_local_url # Check that the current default coverage is up-to-date by comparing sop dates stop_all_containers(docker_client) start_navitia_with_single_coverage(navitia_docker_compose_file_path, navitia_docker_compose_default_file_name, default_coverage_name, False) if not check_prod_date_is_valid_using_heat_map(time_map_server_url, default_coverage_name, current_start_service_date.strftime("%Y%m%d")): _log.error("The %s coverage seems not to be up-to-date following update attempts.", default_coverage_name) return False else: _log.info("%s coverage is up-to-date with production date %s", default_coverage_name, current_start_service_date.strftime("%Y%m%d")) # Check that the coverage_name (the previous one) is up-to-date by comparing sop dates stop_all_containers(docker_client) is_up = start_navitia_with_single_coverage(navitia_docker_compose_file_path, navitia_docker_compose_file_name, coverage_name, False) if not is_up: _log.error("The %s coverage seems not to be up", coverage_name) cov_sop_date = get_coverage_start_production_date(coverage_name) if cov_sop_date == "": _log.info("If this is the first time you're running Transit Analyst Israel data processing, you need to " "copy the generated default.nav.lz4 graph to secondary-cov.nav.lz4 - See docs.") return True if not check_prod_date_is_valid_using_heat_map(time_map_server_url, coverage_name, current_start_service_date.strftime("%Y%m%d")): _log.error("The %s coverage seems not to be up-to-date following update attempts.\nA call for heat map data with" " %s date returned no results", coverage_name, current_start_service_date.strftime("%Y%m%d")) return False _log.info("%s coverage is now updated with new start-of-production date %s. " "Can be accessed via %s%s", coverage_name, current_start_service_date.strftime("%Y%m%d"), time_map_server_url, coverage_name) return True
def check_coverage_running(url, coverage_name): """ Check if Navitia coverage is up and running :param url: Navitia server coverage url :param coverage_name: the name of the coverage to check :return: Whether a Navitia coverage is up and running """ _log.info("checking if %s is up", coverage_name) response = requests.get(url) # Get the status of the coverage as Json json_data = json.loads(response.text) if "regions" not in json_data or "running" not in json_data["regions"][0]['status']: _log.info("%s coverage is down", coverage_name) return False else: _log.info("%s coverage is up", coverage_name) return True
def validate_osm_gtfs_convertion_to_graph_is_completed(worker_con, time_to_wait, start_processing_time): """ Validates that the following Navitia worker tasks were successfully completed: osm2ed, gtfs2ed and ed2nav :param worker_con: the Navitia worker container :param time_to_wait: time to wait for the validation to take place, in minutes. Default is 20 minutes :return: Whether conversion is completed or not """ # Wait if needed _log.info("Waiting %s minutes to let OSM & GTFS conversions to lz4 graph takes place", time_to_wait) time.sleep(time_to_wait * 60) _log.info("I'm back! Verifying that the conversions took place") # Success status look like Task tyr.binarisation.ed2nav[feac06ca-51f7-4e39-bf1d-9541eaac0988] succeeded # and tyr.binarisation.gtfs2ed[feac06ca-51f7-4e39-bf1d-9541eaac0988] succeeded tyr_worker_outputname = "tyr_worker_output.txt" with open(tyr_worker_outputname, "w", encoding="UTF-8") as tyr_worker_output: tyr_worker_output.write(worker_con.logs().decode('utf-8')) tyr_worker_output.close() ed2nav_completed = False with open(tyr_worker_outputname, "r", encoding="UTF-8") as tyr_worker_output: lines = tyr_worker_output.readlines() for line in reversed(lines): if re.compile(r'tyr\.binarisation\.ed2nav\[\S*\] succeeded').search(line): time_of_line = re.findall(r'\d{1,4}-\d{1,2}-\d{1,2}\b \d{1,2}:\d{1,2}:\d{1,2}', line) time_of_line = dt.strptime(time_of_line[0], '%Y-%m-%d %H:%M:%S') if start_processing_time < time_of_line: ed2nav_completed = True break os.remove(tyr_worker_outputname) if ed2nav_completed: _log.info("OSM conversion task ed2nav, GTFS conversion task gtfs2ed and ed2nav are successful") return True else: _log.error("After %s minutes - tasks aren't completed", time_to_wait) return False
import set_next_month_invocation import os import process_date update_time = datetime.datetime.now().strftime("%d/%m/%Y %H:%M") try: if cfg.get_service_date == 'auto': next_month_operation_date = process_date.get_auto_date_nextmonth( ) # The date that the product should already be working next_month_operation_date = datetime.datetime.strptime( next_month_operation_date, '%Y%m%d') next_month_operation_date = next_month_operation_date + datetime.timedelta( hours=23) + datetime.timedelta(minutes=45) _log.info("Setting the next data update date to be %s local time.", next_month_operation_date) set_next_month_invocation.set_next_invocation_date( os.path.basename(__file__)) if cfg.get_service_date == 'auto': #get gtfs files and osm file _log.info("Download OSM & GTFS") import gtfs_osm_download # Stop docker running to release memory for processing docker_client = utils.get_docker_service_client() containers = docker_client.containers.list(filters={"name": "worker"}) if len(containers) > 0: utils.stop_all_containers(utils.get_docker_service_client()) """ # unzip gtfs file
import utils import traceback from logger import _log import set_next_month_invocation import os import process_date update_time = datetime.datetime.now().strftime("%d/%m/%Y %H:%M") try: if cfg.get_service_date == 'auto': next_month_operation_date = process_date.get_auto_date_nextmonth() # The date that the product should already be working next_month_operation_date = datetime.datetime.strptime(next_month_operation_date, '%Y%m%d') next_month_operation_date = next_month_operation_date + datetime.timedelta(hours=23) + datetime.timedelta(minutes=45) _log.info("Setting the next data update date to be %s local time.", next_month_operation_date) set_next_month_invocation.set_next_invocation_date(os.path.basename(__file__)) if cfg.get_service_date == 'auto': #get gtfs files and osm file _log.info("Download OSM & GTFS") import gtfs_osm_download # Stop docker running to release memory for processing docker_client = utils.get_docker_service_client() containers = docker_client.containers.list(filters={"name": "worker"}) if len(containers) > 0: utils.stop_all_containers(utils.get_docker_service_client()) # unzip gtfs file import gtfs_unzip
def get_gtfs_file_from_url_ftp(url, file_name_on_server, _log): """ Downloads a GTFS file from an FTP server to the working directory :param url: the FTP server URL that points to file's containing folder :param file_name_on_server: The file name on the FTP server :return: file name of the downloaded content in the working directory """ _log.info("Going to download the latest GTFS from %s ", url) download_complete = False download_attempts = 1 max_download_attemtps = 24 while not download_complete: if not download_complete and 24 > download_attempts > 1: _log.error( "%s is unreachable. Sleeping for 60 minutes and trying again. This is attempt %s out of " "%s attempts", url, download_attempts, max_download_attemtps) time.sleep(60 * 60) if not download_complete and download_attempts > 24: _log.error( "%s is unreachable for more than 24 hours. Aborting update", url) raise Exception download_attempts += 1 try: # Connect to FTP ftp = ftplib.FTP(url) ftp.login() # Get the GTFS time stamp and generate local file name, "israel20190225" file_lines = [] size = 0 local_file_name = cfg.gtfsdirbase processdate = process_date.get_date_now() ftp.dir("", file_lines.append) for line in file_lines: tokens = line.split(maxsplit=4) name = tokens[3] if name == file_name_on_server: time_str = tokens[0] actual_time = parser.parse(time_str) local_file_name = local_file_name + processdate + ".zip" size = float(tokens[2]) pardir = Path(os.getcwd()).parent local_file_path_and_name = pardir / cfg.gtfspath / local_file_name # Generate a progress bar and download local_file = open(local_file_path_and_name, 'wb') pbar = createProgressBar(size) # Download global size_iterator size_iterator = 0 ftp.retrbinary( "RETR " + file_name_on_server, lambda data, : file_write_update_progress_bar( data, local_file, pbar)) # Finish local_file.close() ftp.quit() pbar.finish() sys.stdout.flush() download_complete = True _log.info("Finished loading latest GTFS to: %s", local_file_path_and_name) return local_file_name except ftplib.all_errors as err: error_code = err.args[0] # file not found on server if error_code == 2: _log.error(file_name_on_server, "is not found on %s", url) raise err # Unvalid URL if error_code == 11001: _log.error("URL %s is not valid", url) continue
def validate_osm_gtfs_convertion_to_graph_is_running(docker_client, coverage_name, navitia_docker_compose_file_path, navitia_docker_compose_file_name): """ Validates that the conversion of gtfs & OSM to Navitia graph is undergoing (continious process). Container tyr_beat is the service that triggers the conversion in the worker container and it does this after new files are copied into /srv/ed/input/<coverage-name> folder in the worker container. If tyr_beat is down and can't be re-started, the container an its image are removed and re-downloaded from the web :param docker_client: the docker client :param secondary_custom_coverage_name: the secondary custom coverage :param navitia_docker_compose_file_path: :param navitia_docker_compose_file_name: :return: """ # tyr_beat must be running as it manages the tasks for the worker, the latter generates the graph _log.info("Validating that tyr_beat is up and running") beat_con = docker_client.containers.list(filters={"name": "beat"}) time_beat_restarted="" if not beat_con: # restarting tyr_beat _log.info("tyr_beat is down, attempting to re-run") tyr_beat_start_command = "docker-compose -f" + navitia_docker_compose_file_name + " -p navitia-docker-compose up tyr_beat" time_beat_restarted = dt.utcnow() with open("tyr_beat_output.txt", "w", encoding="UTF-8") as tyr_beat_output: subprocess.Popen(tyr_beat_start_command, cwd=navitia_docker_compose_file_path, shell=True, stdout=tyr_beat_output, stderr=tyr_beat_output) # Wait 30 seconds for it to come up _log.info("Waiting 15 seconds to see if tyr_beat is up") time.sleep(15) tyr_beat_output.close() # Check that tyr_beat is working using it's log and comparing the restart time with time in the log new_time_is_found = False with open("tyr_beat_output.txt", "r", encoding="UTF-8") as tyr_beat_output: lines = tyr_beat_output.readlines() for line in reversed(lines): if "Sending due task udpate-data-every-30-seconds" in line: time_of_line = re.findall(r'\d{1,4}-\d{1,2}-\d{1,2}\b \d{1,2}:\d{1,2}:\d{1,2}', line) time_of_line = dt.strptime(time_of_line[0], '%Y-%m-%d %H:%M:%S') if time_beat_restarted < time_of_line: _log.info("tyr_beat is up and running") new_time_is_found = True break # tyr_beat is malfunctioned, need to delete and re-download if not new_time_is_found: # stop all containers _log.info("Stopping and removing all containers to pull fresh copy of tyr_beat container") stop_all_containers(docker_client) # delete container and image beat_con = docker_client.containers.list(all=True, filters={"name": "beat"})[0] beat_image = docker_client.images.list(name="navitia/tyr-beat")[0] beat_con_name = beat_con.name beat_image_id = beat_image.id beat_con.remove() _log.info("%s container is removed", beat_con_name) docker_client.images.remove(beat_image.id) _log.info("%s image is removed", beat_image_id) # re-run navitia docker-compose which re-downloads the tyr_beat container _log.info("Restarting docker with %s coverage", coverage_name) start_navitia_with_single_coverage(navitia_docker_compose_file_path, navitia_docker_compose_file_name, coverage_name, True) # removing the log file os.remove("tyr_beat_output.txt") else: _log.info("Validated tyr_beat is up and running")