Exemple #1
0
    def _poll_feed(self, model_builder):
        contributor = (
            model.Contributor.query_existing()
            .filter_by(id=self.config.get("contributor"), connector_type=self.connector_type)
            .first()
        )
        try:
            response = self._retrieve_feed()
            response.raise_for_status()
            # We changed the way we use ETag to check if a new feed has to be processed.
            # see _retrieve_feed function
            # The 304 status code means the feed did not change
            if response.status_code == 304:
                new_relic.ignore_transaction()
                manage_db_no_new(contributor_id=contributor.id)
                return
            elif response.status_code == 200:
                # we store the etag from the response, as it has changed
                self._update_redis_etag(response)
        except Exception as e:
            manage_db_error(
                data="",
                contributor_id=contributor.id,
                error="Http Error",
                is_reprocess_same_data_allowed=True,
            )
            logger = logging.LoggerAdapter(logging.getLogger(__name__), extra={"contributor": contributor.id})
            logger.debug(str(e))
            return

        wrap_build(model_builder(contributor), response.content)
Exemple #2
0
def gtfs_poller(self, config):
    func_name = "gtfs_poller"
    contributor = (
        model.Contributor.query_existing()
        .filter_by(id=config.get("contributor"), connector_type=ConnectorType.gtfs_rt.value)
        .first()
    )

    logger = logging.LoggerAdapter(logging.getLogger(__name__), extra={"contributor": contributor.id})

    lock_name = make_kirin_lock_name(func_name, contributor.id)
    with get_lock(logger, lock_name, app.config[str("REDIS_LOCK_TIMEOUT_POLLER")]) as locked:
        if not locked or not config.get("feed_url"):
            new_relic.ignore_transaction()
            return

        retrieval_interval = config.get("retrieval_interval", 10)
        if _is_last_call_too_recent(func_name, contributor.id, retrieval_interval):
            # do nothing if the last call is too recent
            new_relic.ignore_transaction()
            return

        logger.debug("polling of %s", config.get("feed_url"))

        # We do a HEAD request at the very beginning of polling and we compare it with the previous one to check if
        # the gtfs-rt is changed.
        # If the HEAD request or Redis get/set fail, we just ignore this part and do the polling anyway
        if not _is_newer(config):
            new_relic.ignore_transaction()
            manage_db_no_new(connector_type=ConnectorType.gtfs_rt.value, contributor_id=contributor.id)
            return

        try:
            response = _retrieve_gtfsrt(config)
            response.raise_for_status()
        except Exception as e:
            manage_db_error(
                data="",
                connector_type=ConnectorType.gtfs_rt.value,
                contributor_id=contributor.id,
                error="Http Error",
                is_reprocess_same_data_allowed=True,
            )
            logger.debug(six.text_type(e))
            return

        wrap_build(KirinModelBuilder(contributor), response.content)
        logger.info("%s for %s is finished", func_name, contributor.id)
Exemple #3
0
    def build_rt_update(self, input_raw):
        # create a raw gtfs-rt obj, save the raw protobuf into the db
        proto = gtfs_realtime_pb2.FeedMessage()
        log_dict = {}
        try:
            proto.ParseFromString(input_raw)
        except DecodeError:
            # We save the non-decodable flux gtfs-rt
            rt_update = manage_db_error(
                input_raw.encode(
                    "string_escape",
                    "ignore"),  # protect for PostgreSQL "Text" type
                ConnectorType.gtfs_rt.value,
                contributor_id=self.contributor.id,
                error="invalid protobuf",
                is_reprocess_same_data_allowed=False,
            )
            return rt_update, log_dict

        feed = six.binary_type(
            proto)  # temp, for the moment, we save the protobuf as text
        rt_update = make_rt_update(
            feed,
            connector_type=self.contributor.connector_type,
            contributor_id=self.contributor.id)
        rt_update.proto = proto

        return rt_update, log_dict
Exemple #4
0
def gtfs_poller(self, config):
    func_name = 'gtfs_poller'
    logger = logging.LoggerAdapter(logging.getLogger(__name__), extra={'contributor': config['contributor']})
    logger.debug('polling of %s', config['feed_url'])

    contributor = config['contributor']
    lock_name = make_kirin_lock_name(func_name, contributor)
    with get_lock(logger, lock_name, app.config['REDIS_LOCK_TIMEOUT_POLLER']) as locked:
        if not locked:
            new_relic.ignore_transaction()
            return

        # We do a HEAD request at the very beginning of polling and we compare it with the previous one to check if
        # the gtfs-rt is changed.
        # If the HEAD request or Redis get/set fail, we just ignore this part and do the polling anyway
        if not _is_newer(config):
            new_relic.ignore_transaction()
            manage_db_no_new(connector='gtfs-rt', contributor=contributor)
            return

        try:
            response = requests.get(config['feed_url'], timeout=config.get('timeout', 1))
            response.raise_for_status()

        except Exception as e:
            manage_db_error(data='', connector='gtfs-rt', contributor=contributor,
                            status='KO', error='Http Error')
            logger.debug(str(e))
            return

        nav = navitia_wrapper.Navitia(url=config['navitia_url'],
                                      token=config['token'],
                                      timeout=5,
                                      cache=redis,
                                      query_timeout=app.config.get('NAVITIA_QUERY_CACHE_TIMEOUT', 600),
                                      pubdate_timeout=app.config.get('NAVITIA_PUBDATE_CACHE_TIMEOUT', 600))\
            .instance(config['coverage'])

        proto = gtfs_realtime_pb2.FeedMessage()
        try:
            proto.ParseFromString(response.content)
        except DecodeError:
            manage_db_error(proto, 'gtfs-rt', contributor=contributor, status='KO', error='Decode Error')
            logger.debug('invalid protobuf')
        else:
            model_maker.handle(proto, nav, contributor)
            logger.info('%s for %s is finished', func_name, contributor)
Exemple #5
0
    def post(self):
        raw_proto = _get_gtfs_rt(flask.globals.request)

        from kirin import gtfs_realtime_pb2
        # create a raw gtfs-rt obj, save the raw protobuf into the db
        proto = gtfs_realtime_pb2.FeedMessage()
        try:
            proto.ParseFromString(raw_proto)
        except DecodeError:
            #We save the non-decodable flux gtfs-rt
            manage_db_error(proto,
                            'gtfs-rt',
                            contributor=self.contributor,
                            status='KO',
                            error='Decode Error')
            raise InvalidArguments('invalid protobuf')
        else:
            model_maker.handle(proto, self.navitia_wrapper, self.contributor)
            return 'OK', 200
    def build_rt_update(self, input_raw):
        log_dict = {}
        try:
            ElementTree.fromstring(input_raw)
        except ParseError:
            # We save the non-decodable xml feed
            rt_update = manage_db_error(
                str(input_raw, encoding="utf-8",
                    errors="replace"),  # protect for PostgreSQL "Text" type
                contributor_id=self.contributor.id,
                error="invalid xml",
                is_reprocess_same_data_allowed=False,
            )
            return rt_update, log_dict

        rt_update = make_rt_update(input_raw,
                                   contributor_id=self.contributor.id)
        return rt_update, log_dict
Exemple #7
0
    def build_rt_update(self, input_raw):
        # create a raw gtfs-rt obj, save the raw protobuf into the db (as text)
        proto = gtfs_realtime_pb2.FeedMessage()
        log_dict = {}
        try:
            proto.ParseFromString(input_raw)
        except DecodeError:
            # We save the non-decodable gtfs-rt feed
            rt_update = manage_db_error(
                str(input_raw, encoding="utf-8", errors="replace"),  # protect for PostgreSQL "Text" type
                contributor_id=self.contributor.id,
                error="invalid protobuf",
                is_reprocess_same_data_allowed=False,
            )
            return rt_update, log_dict

        feed = str(proto)  # temp, for the moment, we save the protobuf as text
        rt_update = make_rt_update(feed, contributor_id=self.contributor.id)
        rt_update.proto = proto

        return rt_update, log_dict