def test_tfl_data(): return get_path('test_tfl_format')
def test_data(): return get_path('test_data_dir')
def convert(data_dir, output_filepath, append_to_existing=False): """ Converts TransXchange formatted schedule data into GTFS feed. data_dir : str Data directory containing one or multiple TransXchange .xml files. output_filepath : str Full filepath to the output GTFS zip-file, e.g. '/home/myuser/data/my_gtfs.zip' append_to_existing : bool (default is False) Flag for appending to existing gtfs-database. This might be useful if you have TransXchange .xml files distributed into multiple directories (e.g. separate files for train data, tube data and bus data) and you want to merge all those datasets into a single GTFS feed. """ # Total start tot_start_t = timeit() # Filepath for temporary gtfs db target_dir = os.path.dirname(output_filepath) gtfs_db = os.path.join(target_dir, "gtfs.db") # If append to database is false remove previous gtfs-database if it exists if append_to_existing == False: if os.path.exists(gtfs_db): os.remove(gtfs_db) # NAPTAN stops naptan_stops_fp = get_path("naptan_stops") # Retrieve all TransXChange files files = glob.glob(os.path.join(data_dir, "*.xml")) # Iterate over files print("Populating database ..") # Limit the processed files by file size (in MB) # Files with lower filesize than below will be processed file_size_limit = 1000 # Create workers workers = create_workers(input_files=files, file_size_limit=file_size_limit, stops_fp=naptan_stops_fp, gtfs_db=gtfs_db) # Create Pool pool = multiprocessing.Pool() # Generate GTFS info to the database in parallel pool.map(process_files, workers) # Print information about the total time tot_end_t = timeit() tot_duration = (tot_end_t - tot_start_t) / 60 print("===========================================================") print("It took %s minutes in total." % round(tot_duration, 1)) # Generate output dictionary gtfs_data = generate_gtfs_export(gtfs_db) # Export to disk save_to_gtfs_zip(output_zip_fp=output_filepath, gtfs_data=gtfs_data)
def test_txc21_data(): return get_path('test_txc21_format')
def unpacked_data(): return get_path('test_data_dir')
def dir_with_packed_data(): return get_path('test_dir_with_packed_data')
def nested_data(): return get_path('test_nested_packed_data')
def packed_data(): return get_path('test_packed_data')
def test_naptan_data(): return get_path('naptan_stops')