def __init__(self, data_path=FloydDataPaths): self.delays_producer = RmqProducer(EXCHANGES.DELAYS_PROVIDER.value) self.downloader = Downloader() self.parser = VehiclePositionsParser() self.merger = VehiclePositionsMerger() self.extractor = DelaysExtractor() self.data_path = data_path
def __init__(self, data_path=FloydDataPaths, configuration=DEFAULT_EXTRACTOR_CONFIGURATION): self.floyd_data_producer = RmqProducer(EXCHANGES.DATA_PROVIDER.value) self.downloader = Downloader() self.parser = Parser() self.merger = Merger() self.corrector = Corrector() self.extractor = Extractor(configuration) self.data_path = data_path self.alive = False
class DelaysProvider: def __init__(self, data_path=FloydDataPaths): self.delays_producer = RmqProducer(EXCHANGES.DELAYS_PROVIDER.value) self.downloader = Downloader() self.parser = VehiclePositionsParser() self.merger = VehiclePositionsMerger() self.extractor = DelaysExtractor() self.data_path = data_path def start(self): self.delays_producer.start() logger.info("DelaysProvider: has started.") while True: try: vehicle_positions_pb_T, vehicle_positions_pb_A = self.downloader.download_vehicle_positions( ) vehicle_positions_df_T = self.parser.parse( vehicle_positions_pb_T) vehicle_positions_df_A = self.parser.parse( vehicle_positions_pb_A) services_id_offset = len( load_property_from_config_json("services")[0]) vehicle_positions_df = self.merger.merge( vehicle_positions_df_T, vehicle_positions_df_A, services_id_offset) stop_times_df = pd.read_pickle( self.data_path.stops_times_df.value) delays_df = self.extractor.extract(vehicle_positions_df, stop_times_df) delays_df.to_pickle(self.data_path.delays_df.value) self.delays_producer.send_msg( MESSAGES.DELAYS_UPDATED.value, lost_stream_msg="Solvers are down.") logger.info("DelaysProvider: delays updated") time.sleep(60) except socket.gaierror: logger.warn( "DelaysProvider: Can't download data: Internet connection lost." ) time.sleep(60) except TimeoutError: logger.warn( "DelaysProvider: Connection timeout while trying to download data." ) time.sleep(60) except (TypeError, FileNotFoundError): logger.warn( f"Delays provider: Some pickles in data directory are missing this service won't " f"work without them. Wait for DataProvider to finish processing GTFS files." ) time.sleep(60)
class RmqSequenceSolver: def __init__(self): self.sequence_solver = SequenceSolver() self.query_consumer = RmqConsumer(EXCHANGES.SEQUENCE_QUERY.value, self.consume_query) self.results_producer = RmqProducer( EXCHANGES.FLASK_SERVER_SEQUENCE.value) def start(self): self.sequence_solver.start() self.results_producer.start() self.query_consumer.start() def stop(self): self.query_consumer.stop() self.results_producer.stop() self.sequence_solver.data_manager.stop() def consume_query(self, query: SequenceQuery): try: sequence = self.sequence_solver.find_best_sequence(query) self.results_producer.send_msg(sequence) except Exception as e: logger.error( f"SequenceSolver({id(self.sequence_solver)}) Error while searching for meeting points {e}" ) self.results_producer.send_msg( SequenceResults(query.query_id, ErrorCodes.INTERNAL_SERVER_ERROR.value, []))
def __init__(self, name): self.app = Flask(name) self.app.config['JSON_AS_ASCII'] = False self.query_id = Value('i', 0) self.cache = CacheDict(cache_len=1000) self.connection_producer = RmqProducer( EXCHANGES.CONNECTION_QUERY.value) self.meeting_producer = RmqProducer(EXCHANGES.MEETING_QUERY.value) self.sequence_producer = RmqProducer(EXCHANGES.SEQUENCE_QUERY.value) self.connection_consumer = RmqConsumer( EXCHANGES.FLASK_SERVER_CONNECTION.value, self.consume_rabbit_results) self.meeting_consumer = RmqConsumer( EXCHANGES.FLASK_SERVER_MEETING.value, self.consume_rabbit_results) self.sequence_consumer = RmqConsumer( EXCHANGES.FLASK_SERVER_SEQUENCE.value, self.consume_rabbit_results) self.connection_consumer_thread = Thread( target=self.connection_consumer.start, args=[]) self.meeting_consumer_thread = Thread( target=self.meeting_consumer.start, args=[]) self.sequence_consumer_thread = Thread( target=self.sequence_consumer.start, args=[]) self.data_manager = FlaskDataManager() self.stops = None self.last_update_date = None self.limiter = Limiter(self.app, key_func=get_remote_address, default_limits=["100 per hour"]) self.limiter.exempt(self.handle_get_query) self.limiter.exempt(self.handle_get_stops)
class RmqConnectionSolver: def __init__(self): self.connection_solver = ConnectionSolver() self.query_consumer = RmqConsumer(EXCHANGES.CONNECTION_QUERY.value, self.consume_query) self.results_producer = RmqProducer(EXCHANGES.FLASK_SERVER_CONNECTION.value) def start(self): self.connection_solver.start() self.results_producer.start() self.query_consumer.start() def stop(self): self.query_consumer.stop() self.results_producer.stop() self.connection_solver.data_manager.stop() def consume_query(self, query: ConnectionQuery): # try: connection_results = self.connection_solver.find_connections(query) self.results_producer.send_msg(connection_results)
def __init__(self): self.connection_solver = ConnectionSolver() self.query_consumer = RmqConsumer(EXCHANGES.CONNECTION_QUERY.value, self.consume_query) self.results_producer = RmqProducer(EXCHANGES.FLASK_SERVER_CONNECTION.value)
class RequestBroker: app = None def __init__(self, name): self.app = Flask(name) self.app.config['JSON_AS_ASCII'] = False self.query_id = Value('i', 0) self.cache = CacheDict(cache_len=1000) self.connection_producer = RmqProducer( EXCHANGES.CONNECTION_QUERY.value) self.meeting_producer = RmqProducer(EXCHANGES.MEETING_QUERY.value) self.sequence_producer = RmqProducer(EXCHANGES.SEQUENCE_QUERY.value) self.connection_consumer = RmqConsumer( EXCHANGES.FLASK_SERVER_CONNECTION.value, self.consume_rabbit_results) self.meeting_consumer = RmqConsumer( EXCHANGES.FLASK_SERVER_MEETING.value, self.consume_rabbit_results) self.sequence_consumer = RmqConsumer( EXCHANGES.FLASK_SERVER_SEQUENCE.value, self.consume_rabbit_results) self.connection_consumer_thread = Thread( target=self.connection_consumer.start, args=[]) self.meeting_consumer_thread = Thread( target=self.meeting_consumer.start, args=[]) self.sequence_consumer_thread = Thread( target=self.sequence_consumer.start, args=[]) self.data_manager = FlaskDataManager() self.stops = None self.last_update_date = None self.limiter = Limiter(self.app, key_func=get_remote_address, default_limits=["100 per hour"]) self.limiter.exempt(self.handle_get_query) self.limiter.exempt(self.handle_get_stops) def update_data(self): data = self.data_manager.get_updated_data() self.stops = data['stops'] self.last_update_date = self.data_manager.last_data_update def run(self): self.app.run(threaded=True, host='0.0.0.0', port=5000) def add_endpoint(self, endpoint=None, endpoint_name=None, handler=None, methods=None): self.app.add_url_rule(endpoint, endpoint_name, handler, methods=methods) def start(self): self.data_manager.start() if self.data_is_loaded(): self.update_data() self.connection_producer.start() self.meeting_producer.start() self.sequence_producer.start() self.connection_consumer_thread.start() self.meeting_consumer_thread.start() self.sequence_consumer_thread.start() self.add_endpoint('/connection', 'connection', self.handle_post_connection, ['POST']) self.add_endpoint('/meeting', 'meeting', self.handle_post_meeting, ['POST']) self.add_endpoint('/sequence', 'sequence', self.handle_post_sequence, ['POST']) self.add_endpoint('/result/<query_id>', 'results', self.handle_get_query, ['GET']) self.add_endpoint(f'/stops', 'stops', self.handle_get_stops, ['GET']) logger.info('SolverBroker: started') self.run() def consume_rabbit_results(self, result): result["result"]["is_done"] = True self.cache[result["query_id"]] = { "result": result["result"], "error": result["error"] } def handle_get_query(self, query_id): try: query_id = int(query_id) except ValueError: return jsonify(ErrorCodes.BAD_QUERY_ID_TYPE.value), 400 try: result = self.cache[query_id] except KeyError: return jsonify(ErrorCodes.BAD_QUERY_ID_VALUE.value), 400 if result["result"][ "is_done"] and result["error"] != ErrorCodes.OK.value: if result["error"] in [ ErrorCodes.INTERNAL_SERVER_ERROR.value, ErrorCodes.INTERNAL_DATA_NOT_LOADED.value ]: return jsonify(result["error"]), 500 return jsonify(result["error"]), 400 return jsonify(result["result"]), 202 def handle_get_stops(self): if not self.data_is_loaded(): return jsonify(ErrorCodes.INTERNAL_DATA_NOT_LOADED.value), 500 if self.last_update_date is None or self.last_update_date < self.data_manager.last_data_update: self.update_data() return jsonify(self.stops), 202 def handle_post_connection(self): request_json = request.get_json() result = self.handle_query_post( self.connection_producer, request_json, ConnectionQuery, ErrorCodes.BAD_CONNECTION_JSON_FORMAT.value) return result def handle_post_meeting(self): request_json = request.get_json() result = self.handle_query_post( self.meeting_producer, request_json, MeetingQuery, ErrorCodes.BAD_MEETING_JSON_FORMAT.value) return result def handle_post_sequence(self): request_json = request.get_json() result = self.handle_query_post( self.sequence_producer, request_json, SequenceQuery, ErrorCodes.BAD_SEQUENCE_JSON_FORMAT.value) return result def handle_query_post(self, producer, request_json, query_class, parsing_error_message): if not query_class.validate(request_json): return jsonify(parsing_error_message), 400 with self.query_id.get_lock(): self.query_id.value += 1 query_id = self.query_id.value request_json["query_id"] = query_id producer.send_msg(request_json) self.cache[query_id] = {"result": {"is_done": False}} return jsonify({"query_id": query_id}), 202 def data_is_loaded(self): if self.data_manager.data_loaded: return True else: logger.warn( f"RequestBroker: Some pickles in data directory are missing this service won't " f"work without them. Wait for DataProvider to finish processing GTFS files." ) return False
def __init__(self): self.sequence_solver = SequenceSolver() self.query_consumer = RmqConsumer(EXCHANGES.SEQUENCE_QUERY.value, self.consume_query) self.results_producer = RmqProducer( EXCHANGES.FLASK_SERVER_SEQUENCE.value)
class DataProvider: def __init__(self, data_path=FloydDataPaths, configuration=DEFAULT_EXTRACTOR_CONFIGURATION): self.floyd_data_producer = RmqProducer(EXCHANGES.DATA_PROVIDER.value) self.downloader = Downloader() self.parser = Parser() self.merger = Merger() self.corrector = Corrector() self.extractor = Extractor(configuration) self.data_path = data_path self.alive = False def start(self): self.floyd_data_producer.start() self.alive = True logger.info("DataProvider: has started.") while self.alive: try: new_update_date = self.downloader.get_last_update_time() last_update_date = self.load_update_date() if last_update_date is None or new_update_date > last_update_date: self.process_data() save_property_to_config_json( "update_date", new_update_date.strftime("%Y-%m-%d %H:%M:%S")) self.floyd_data_producer.send_msg( MESSAGES.DATA_UPDATED.value, lost_stream_msg="Solvers are down.") time.sleep(60) except socket.gaierror: time.sleep(60) logger.warn( "DataProvider: Can't download data: Internet connection lost." ) except TimeoutError: logger.warn( "DataProvider: Connection timeout while trying to download data." ) time.sleep(60) def stop(self): self.floyd_data_producer.stop() self.alive = False @staticmethod def load_update_date(): last_update = load_property_from_config_json("update_date") if last_update is None: return None return datetime.strptime(last_update, DATETIME_FORMAT) def process_data(self): logger.info("DataProvider: updating data") gtfs_zip_T, gtfs_zip_A = self.downloader.download_gtfs_static_data() parsed_data_T = self.parser.parse(gtfs_zip_T) parsed_data_A = self.parser.parse(gtfs_zip_A) logger.info("DataProvider: data parsed") merged_data, service_id_offset = self.merger.merge( parsed_data_T, parsed_data_A) logger.info("DataProvider: data merged") corrected_data = self.corrector.correct(merged_data) logger.info("DataProvider: data corrected") save_property_to_config_json("services", [ list(parsed_data_T.calendar_df.index), list(parsed_data_A.calendar_df.index + service_id_offset) ]) extracted_data = self.extractor.extract(corrected_data) logger.info("DataProvider: data extracted") extracted_data.save(self.data_path) logger.info("DataProvider: data saved")