Exemple #1
0
 def __init__(self, data_path=FloydDataPaths):
     self.delays_producer = RmqProducer(EXCHANGES.DELAYS_PROVIDER.value)
     self.downloader = Downloader()
     self.parser = VehiclePositionsParser()
     self.merger = VehiclePositionsMerger()
     self.extractor = DelaysExtractor()
     self.data_path = data_path
 def __init__(self,
              data_path=FloydDataPaths,
              configuration=DEFAULT_EXTRACTOR_CONFIGURATION):
     self.floyd_data_producer = RmqProducer(EXCHANGES.DATA_PROVIDER.value)
     self.downloader = Downloader()
     self.parser = Parser()
     self.merger = Merger()
     self.corrector = Corrector()
     self.extractor = Extractor(configuration)
     self.data_path = data_path
     self.alive = False
Exemple #3
0
class DelaysProvider:
    def __init__(self, data_path=FloydDataPaths):
        self.delays_producer = RmqProducer(EXCHANGES.DELAYS_PROVIDER.value)
        self.downloader = Downloader()
        self.parser = VehiclePositionsParser()
        self.merger = VehiclePositionsMerger()
        self.extractor = DelaysExtractor()
        self.data_path = data_path

    def start(self):
        self.delays_producer.start()
        logger.info("DelaysProvider: has started.")
        while True:
            try:
                vehicle_positions_pb_T, vehicle_positions_pb_A = self.downloader.download_vehicle_positions(
                )
                vehicle_positions_df_T = self.parser.parse(
                    vehicle_positions_pb_T)
                vehicle_positions_df_A = self.parser.parse(
                    vehicle_positions_pb_A)

                services_id_offset = len(
                    load_property_from_config_json("services")[0])
                vehicle_positions_df = self.merger.merge(
                    vehicle_positions_df_T, vehicle_positions_df_A,
                    services_id_offset)

                stop_times_df = pd.read_pickle(
                    self.data_path.stops_times_df.value)
                delays_df = self.extractor.extract(vehicle_positions_df,
                                                   stop_times_df)

                delays_df.to_pickle(self.data_path.delays_df.value)
                self.delays_producer.send_msg(
                    MESSAGES.DELAYS_UPDATED.value,
                    lost_stream_msg="Solvers are down.")
                logger.info("DelaysProvider: delays updated")
                time.sleep(60)
            except socket.gaierror:
                logger.warn(
                    "DelaysProvider: Can't download data: Internet connection lost."
                )
                time.sleep(60)
            except TimeoutError:
                logger.warn(
                    "DelaysProvider: Connection timeout while trying to download data."
                )
                time.sleep(60)
            except (TypeError, FileNotFoundError):
                logger.warn(
                    f"Delays provider: Some pickles in data directory are missing this service won't "
                    f"work without them. Wait for DataProvider to finish processing GTFS files."
                )
                time.sleep(60)
Exemple #4
0
class RmqSequenceSolver:
    def __init__(self):
        self.sequence_solver = SequenceSolver()
        self.query_consumer = RmqConsumer(EXCHANGES.SEQUENCE_QUERY.value,
                                          self.consume_query)
        self.results_producer = RmqProducer(
            EXCHANGES.FLASK_SERVER_SEQUENCE.value)

    def start(self):
        self.sequence_solver.start()
        self.results_producer.start()
        self.query_consumer.start()

    def stop(self):
        self.query_consumer.stop()
        self.results_producer.stop()
        self.sequence_solver.data_manager.stop()

    def consume_query(self, query: SequenceQuery):
        try:
            sequence = self.sequence_solver.find_best_sequence(query)
            self.results_producer.send_msg(sequence)
        except Exception as e:
            logger.error(
                f"SequenceSolver({id(self.sequence_solver)}) Error while searching for meeting points {e}"
            )
            self.results_producer.send_msg(
                SequenceResults(query.query_id,
                                ErrorCodes.INTERNAL_SERVER_ERROR.value, []))
    def __init__(self, name):
        self.app = Flask(name)
        self.app.config['JSON_AS_ASCII'] = False
        self.query_id = Value('i', 0)
        self.cache = CacheDict(cache_len=1000)

        self.connection_producer = RmqProducer(
            EXCHANGES.CONNECTION_QUERY.value)
        self.meeting_producer = RmqProducer(EXCHANGES.MEETING_QUERY.value)
        self.sequence_producer = RmqProducer(EXCHANGES.SEQUENCE_QUERY.value)

        self.connection_consumer = RmqConsumer(
            EXCHANGES.FLASK_SERVER_CONNECTION.value,
            self.consume_rabbit_results)
        self.meeting_consumer = RmqConsumer(
            EXCHANGES.FLASK_SERVER_MEETING.value, self.consume_rabbit_results)
        self.sequence_consumer = RmqConsumer(
            EXCHANGES.FLASK_SERVER_SEQUENCE.value, self.consume_rabbit_results)

        self.connection_consumer_thread = Thread(
            target=self.connection_consumer.start, args=[])
        self.meeting_consumer_thread = Thread(
            target=self.meeting_consumer.start, args=[])
        self.sequence_consumer_thread = Thread(
            target=self.sequence_consumer.start, args=[])

        self.data_manager = FlaskDataManager()
        self.stops = None
        self.last_update_date = None
        self.limiter = Limiter(self.app,
                               key_func=get_remote_address,
                               default_limits=["100 per hour"])
        self.limiter.exempt(self.handle_get_query)
        self.limiter.exempt(self.handle_get_stops)
Exemple #6
0
class RmqConnectionSolver:
    def __init__(self):
        self.connection_solver = ConnectionSolver()
        self.query_consumer = RmqConsumer(EXCHANGES.CONNECTION_QUERY.value, self.consume_query)
        self.results_producer = RmqProducer(EXCHANGES.FLASK_SERVER_CONNECTION.value)

    def start(self):
        self.connection_solver.start()
        self.results_producer.start()
        self.query_consumer.start()

    def stop(self):
        self.query_consumer.stop()
        self.results_producer.stop()
        self.connection_solver.data_manager.stop()

    def consume_query(self, query: ConnectionQuery):
        # try:
            connection_results = self.connection_solver.find_connections(query)
            self.results_producer.send_msg(connection_results)
Exemple #7
0
 def __init__(self):
     self.connection_solver = ConnectionSolver()
     self.query_consumer = RmqConsumer(EXCHANGES.CONNECTION_QUERY.value, self.consume_query)
     self.results_producer = RmqProducer(EXCHANGES.FLASK_SERVER_CONNECTION.value)
class RequestBroker:
    app = None

    def __init__(self, name):
        self.app = Flask(name)
        self.app.config['JSON_AS_ASCII'] = False
        self.query_id = Value('i', 0)
        self.cache = CacheDict(cache_len=1000)

        self.connection_producer = RmqProducer(
            EXCHANGES.CONNECTION_QUERY.value)
        self.meeting_producer = RmqProducer(EXCHANGES.MEETING_QUERY.value)
        self.sequence_producer = RmqProducer(EXCHANGES.SEQUENCE_QUERY.value)

        self.connection_consumer = RmqConsumer(
            EXCHANGES.FLASK_SERVER_CONNECTION.value,
            self.consume_rabbit_results)
        self.meeting_consumer = RmqConsumer(
            EXCHANGES.FLASK_SERVER_MEETING.value, self.consume_rabbit_results)
        self.sequence_consumer = RmqConsumer(
            EXCHANGES.FLASK_SERVER_SEQUENCE.value, self.consume_rabbit_results)

        self.connection_consumer_thread = Thread(
            target=self.connection_consumer.start, args=[])
        self.meeting_consumer_thread = Thread(
            target=self.meeting_consumer.start, args=[])
        self.sequence_consumer_thread = Thread(
            target=self.sequence_consumer.start, args=[])

        self.data_manager = FlaskDataManager()
        self.stops = None
        self.last_update_date = None
        self.limiter = Limiter(self.app,
                               key_func=get_remote_address,
                               default_limits=["100 per hour"])
        self.limiter.exempt(self.handle_get_query)
        self.limiter.exempt(self.handle_get_stops)

    def update_data(self):
        data = self.data_manager.get_updated_data()
        self.stops = data['stops']
        self.last_update_date = self.data_manager.last_data_update

    def run(self):
        self.app.run(threaded=True, host='0.0.0.0', port=5000)

    def add_endpoint(self,
                     endpoint=None,
                     endpoint_name=None,
                     handler=None,
                     methods=None):
        self.app.add_url_rule(endpoint,
                              endpoint_name,
                              handler,
                              methods=methods)

    def start(self):
        self.data_manager.start()
        if self.data_is_loaded():
            self.update_data()

        self.connection_producer.start()
        self.meeting_producer.start()
        self.sequence_producer.start()
        self.connection_consumer_thread.start()
        self.meeting_consumer_thread.start()
        self.sequence_consumer_thread.start()

        self.add_endpoint('/connection', 'connection',
                          self.handle_post_connection, ['POST'])
        self.add_endpoint('/meeting', 'meeting', self.handle_post_meeting,
                          ['POST'])
        self.add_endpoint('/sequence', 'sequence', self.handle_post_sequence,
                          ['POST'])
        self.add_endpoint('/result/<query_id>', 'results',
                          self.handle_get_query, ['GET'])
        self.add_endpoint(f'/stops', 'stops', self.handle_get_stops, ['GET'])

        logger.info('SolverBroker: started')
        self.run()

    def consume_rabbit_results(self, result):
        result["result"]["is_done"] = True
        self.cache[result["query_id"]] = {
            "result": result["result"],
            "error": result["error"]
        }

    def handle_get_query(self, query_id):
        try:
            query_id = int(query_id)
        except ValueError:
            return jsonify(ErrorCodes.BAD_QUERY_ID_TYPE.value), 400
        try:
            result = self.cache[query_id]
        except KeyError:
            return jsonify(ErrorCodes.BAD_QUERY_ID_VALUE.value), 400

        if result["result"][
                "is_done"] and result["error"] != ErrorCodes.OK.value:
            if result["error"] in [
                    ErrorCodes.INTERNAL_SERVER_ERROR.value,
                    ErrorCodes.INTERNAL_DATA_NOT_LOADED.value
            ]:
                return jsonify(result["error"]), 500
            return jsonify(result["error"]), 400
        return jsonify(result["result"]), 202

    def handle_get_stops(self):
        if not self.data_is_loaded():
            return jsonify(ErrorCodes.INTERNAL_DATA_NOT_LOADED.value), 500
        if self.last_update_date is None or self.last_update_date < self.data_manager.last_data_update:
            self.update_data()
        return jsonify(self.stops), 202

    def handle_post_connection(self):
        request_json = request.get_json()
        result = self.handle_query_post(
            self.connection_producer, request_json, ConnectionQuery,
            ErrorCodes.BAD_CONNECTION_JSON_FORMAT.value)
        return result

    def handle_post_meeting(self):
        request_json = request.get_json()
        result = self.handle_query_post(
            self.meeting_producer, request_json, MeetingQuery,
            ErrorCodes.BAD_MEETING_JSON_FORMAT.value)
        return result

    def handle_post_sequence(self):
        request_json = request.get_json()
        result = self.handle_query_post(
            self.sequence_producer, request_json, SequenceQuery,
            ErrorCodes.BAD_SEQUENCE_JSON_FORMAT.value)
        return result

    def handle_query_post(self, producer, request_json, query_class,
                          parsing_error_message):
        if not query_class.validate(request_json):
            return jsonify(parsing_error_message), 400
        with self.query_id.get_lock():
            self.query_id.value += 1
            query_id = self.query_id.value
        request_json["query_id"] = query_id
        producer.send_msg(request_json)
        self.cache[query_id] = {"result": {"is_done": False}}
        return jsonify({"query_id": query_id}), 202

    def data_is_loaded(self):
        if self.data_manager.data_loaded:
            return True
        else:
            logger.warn(
                f"RequestBroker: Some pickles in data directory are missing this service won't "
                f"work without them. Wait for DataProvider to finish processing GTFS files."
            )
            return False
Exemple #9
0
 def __init__(self):
     self.sequence_solver = SequenceSolver()
     self.query_consumer = RmqConsumer(EXCHANGES.SEQUENCE_QUERY.value,
                                       self.consume_query)
     self.results_producer = RmqProducer(
         EXCHANGES.FLASK_SERVER_SEQUENCE.value)
Exemple #10
0
class DataProvider:
    def __init__(self,
                 data_path=FloydDataPaths,
                 configuration=DEFAULT_EXTRACTOR_CONFIGURATION):
        self.floyd_data_producer = RmqProducer(EXCHANGES.DATA_PROVIDER.value)
        self.downloader = Downloader()
        self.parser = Parser()
        self.merger = Merger()
        self.corrector = Corrector()
        self.extractor = Extractor(configuration)
        self.data_path = data_path
        self.alive = False

    def start(self):
        self.floyd_data_producer.start()
        self.alive = True
        logger.info("DataProvider: has started.")
        while self.alive:
            try:
                new_update_date = self.downloader.get_last_update_time()
                last_update_date = self.load_update_date()
                if last_update_date is None or new_update_date > last_update_date:
                    self.process_data()
                    save_property_to_config_json(
                        "update_date",
                        new_update_date.strftime("%Y-%m-%d %H:%M:%S"))
                    self.floyd_data_producer.send_msg(
                        MESSAGES.DATA_UPDATED.value,
                        lost_stream_msg="Solvers are down.")
                time.sleep(60)
            except socket.gaierror:
                time.sleep(60)
                logger.warn(
                    "DataProvider: Can't download data: Internet connection lost."
                )
            except TimeoutError:
                logger.warn(
                    "DataProvider: Connection timeout while trying to download data."
                )
                time.sleep(60)

    def stop(self):
        self.floyd_data_producer.stop()
        self.alive = False

    @staticmethod
    def load_update_date():
        last_update = load_property_from_config_json("update_date")
        if last_update is None:
            return None
        return datetime.strptime(last_update, DATETIME_FORMAT)

    def process_data(self):
        logger.info("DataProvider: updating data")
        gtfs_zip_T, gtfs_zip_A = self.downloader.download_gtfs_static_data()
        parsed_data_T = self.parser.parse(gtfs_zip_T)
        parsed_data_A = self.parser.parse(gtfs_zip_A)
        logger.info("DataProvider: data parsed")

        merged_data, service_id_offset = self.merger.merge(
            parsed_data_T, parsed_data_A)
        logger.info("DataProvider: data merged")

        corrected_data = self.corrector.correct(merged_data)
        logger.info("DataProvider: data corrected")

        save_property_to_config_json("services", [
            list(parsed_data_T.calendar_df.index),
            list(parsed_data_A.calendar_df.index + service_id_offset)
        ])

        extracted_data = self.extractor.extract(corrected_data)
        logger.info("DataProvider: data extracted")

        extracted_data.save(self.data_path)
        logger.info("DataProvider: data saved")