def download_merged_data() -> ParsedData: downloader = Downloader() gtfs_zip_T, gtfs_zip_A = downloader.download_gtfs_static_data() parser = Parser() parsed_data_A = parser.parse(gtfs_zip_A) parsed_data_T = parser.parse(gtfs_zip_T) merger = Merger() merged_data, _ = merger.merge(parsed_data_A, parsed_data_T) return merged_data
def __init__(self, data_path=FloydDataPaths, configuration=DEFAULT_EXTRACTOR_CONFIGURATION): self.floyd_data_producer = RmqProducer(EXCHANGES.DATA_PROVIDER.value) self.downloader = Downloader() self.parser = Parser() self.merger = Merger() self.corrector = Corrector() self.extractor = Extractor(configuration) self.data_path = data_path self.alive = False
import pickle from math import cos, radians from pathlib import Path import matplotlib.pyplot as plt import networkx as nx from krk_meetings.data_provider.gtfs_static.Corrector import Corrector from krk_meetings.data_provider.gtfs_static.Merger import Merger from krk_meetings.data_provider.gtfs_static.Parser import Parser if __name__ == '__main__': data_dir = Path(__file__).parent.parent / 'data_provider' / 'data' parser = Parser() parsed_data_A = parser.parse(data_dir / 'GTFS_KRK_A.zip') parsed_data_T = parser.parse(data_dir / 'GTFS_KRK_T.zip') merger = Merger() merged_data, _ = merger.merge(parsed_data_A, parsed_data_T) corrector = Corrector() corrected_data = corrector.correct(merged_data) with open(data_dir / 'border.pickle', 'rb') as f: border = pickle.load(f) fig, ax = plt.subplots(figsize=(10, 7)) fig.subplots_adjust(left=-0.06, right=1.06, top=1.02, bottom=-0.03) ax.set_aspect(1 / cos(radians(50)))
from krk_meetings.data_provider.Downloader import Downloader from krk_meetings.data_provider.gtfs_static.Corrector import Corrector from krk_meetings.data_provider.gtfs_static.Merger import Merger from krk_meetings.data_provider.gtfs_static.Parser import Parser if __name__ == '__main__': downloader = Downloader() gtfs_zip_T, gtfs_zip_A = downloader.download_gtfs_static_data() parser = Parser() parsed_data_A = parser.parse(gtfs_zip_A) parsed_data_T = parser.parse(gtfs_zip_T) # selector = Selector() # selected_data_A = selector.select(parsed_data_A, service_id=1) # selected_data_T = selector.select(parsed_data_T, service_id=1) merger = Merger() merged_data, service_id_offset = merger.merge(parsed_data_A, parsed_data_T) corrector = Corrector() corrected_data = corrector.correct(merged_data)
class DataProvider: def __init__(self, data_path=FloydDataPaths, configuration=DEFAULT_EXTRACTOR_CONFIGURATION): self.floyd_data_producer = RmqProducer(EXCHANGES.DATA_PROVIDER.value) self.downloader = Downloader() self.parser = Parser() self.merger = Merger() self.corrector = Corrector() self.extractor = Extractor(configuration) self.data_path = data_path self.alive = False def start(self): self.floyd_data_producer.start() self.alive = True logger.info("DataProvider: has started.") while self.alive: try: new_update_date = self.downloader.get_last_update_time() last_update_date = self.load_update_date() if last_update_date is None or new_update_date > last_update_date: self.process_data() save_property_to_config_json( "update_date", new_update_date.strftime("%Y-%m-%d %H:%M:%S")) self.floyd_data_producer.send_msg( MESSAGES.DATA_UPDATED.value, lost_stream_msg="Solvers are down.") time.sleep(60) except socket.gaierror: time.sleep(60) logger.warn( "DataProvider: Can't download data: Internet connection lost." ) except TimeoutError: logger.warn( "DataProvider: Connection timeout while trying to download data." ) time.sleep(60) def stop(self): self.floyd_data_producer.stop() self.alive = False @staticmethod def load_update_date(): last_update = load_property_from_config_json("update_date") if last_update is None: return None return datetime.strptime(last_update, DATETIME_FORMAT) def process_data(self): logger.info("DataProvider: updating data") gtfs_zip_T, gtfs_zip_A = self.downloader.download_gtfs_static_data() parsed_data_T = self.parser.parse(gtfs_zip_T) parsed_data_A = self.parser.parse(gtfs_zip_A) logger.info("DataProvider: data parsed") merged_data, service_id_offset = self.merger.merge( parsed_data_T, parsed_data_A) logger.info("DataProvider: data merged") corrected_data = self.corrector.correct(merged_data) logger.info("DataProvider: data corrected") save_property_to_config_json("services", [ list(parsed_data_T.calendar_df.index), list(parsed_data_A.calendar_df.index + service_id_offset) ]) extracted_data = self.extractor.extract(corrected_data) logger.info("DataProvider: data extracted") extracted_data.save(self.data_path) logger.info("DataProvider: data saved")
def parsed_data() -> ParsedData: parser = Parser() return parser.parse(gtfs_zip_path)
def test_parse_arg_io(): parser = Parser() with open(gtfs_zip_path, 'rb') as f: parser.parse(f)
def test_parse_arg_path(): parser = Parser() parser.parse(gtfs_zip_path)
def test_parse_arg_str(): parser = Parser() parser.parse(str(gtfs_zip_path))