def test_load_default_if_file_not_exists(self, tmpdir): filename = unicode(tmpdir.join("does_not_exist.yaml")) store = SyncStateStore(filename) state = store.load() assert state assert state.last_cassandra_to_elasticsearch_sync is None assert state.last_elasticsearch_to_cassandra_sync is None
def __init__(self, cassandra_cluster, elasticsearch_client, settings, state_file_name): self._logger = logging.getLogger() self._state_store = SyncStateStore(state_file_name) self._interval_between_runs = settings.interval_between_runs self._cassandra_to_elasticsearch_river = CassandraToElasticsearchRiver( cassandra_cluster, elasticsearch_client, settings) self._elasticsearch_to_cassandra_river = ElasticsearchToCassandraRiver( elasticsearch_client, cassandra_cluster, settings)
def test_load_from_existing_file_then_save_none(self, tmpdir, existing_filename): copied_filename = copy_file(tmpdir, existing_filename) store = SyncStateStore(copied_filename) state = store.load() state.last_cassandra_to_elasticsearch_sync = None state.last_elasticsearch_to_cassandra_sync = None state.save() state = store.load() assert state.last_cassandra_to_elasticsearch_sync is None assert state.last_elasticsearch_to_cassandra_sync is None
def test_load_from_existing_file(self, existing_filename): store = SyncStateStore(existing_filename) state = store.load() assert state last_elasticsearch_to_cassandra_brazil_time = arrow.get("2015-01-23T15:01:22.654321-03:00", DATE_FORMAT) last_elasticsearch_to_cassandra_utc_time = arrow.get("2015-01-23T18:01:22.654321-00:00", DATE_FORMAT) assert state.last_elasticsearch_to_cassandra_sync == last_elasticsearch_to_cassandra_brazil_time.float_timestamp assert state.last_elasticsearch_to_cassandra_sync == last_elasticsearch_to_cassandra_utc_time.float_timestamp last_cassandra_to_elasticsearch_brazil_time = arrow.get("2015-01-23T14:55:33.123456-03:00", DATE_FORMAT) last_cassandra_to_elasticsearch_utc_time = arrow.get("2015-01-23T17:55:33.123456-00:00", DATE_FORMAT) assert state.last_cassandra_to_elasticsearch_sync == last_cassandra_to_elasticsearch_brazil_time.float_timestamp assert state.last_cassandra_to_elasticsearch_sync == last_cassandra_to_elasticsearch_utc_time.float_timestamp
def test_load_default_from_non_existent_file_them_save(self, tmpdir): filename = unicode(tmpdir.join(random_filename())) store = SyncStateStore(filename) state = store.load() last_cassandra_to_elasticsearch_brazil_time = arrow.get("2015-01-23T14:55:33.123456-03:00", DATE_FORMAT) state.last_cassandra_to_elasticsearch_sync = last_cassandra_to_elasticsearch_brazil_time.timestamp last_elasticsearch_to_cassandra_brazil_time = arrow.get("2015-01-23T15:01:22.654321-03:00", DATE_FORMAT) state.last_elasticsearch_to_cassandra_sync = last_elasticsearch_to_cassandra_brazil_time.timestamp state.save() state = store.load() assert state.last_cassandra_to_elasticsearch_sync == last_cassandra_to_elasticsearch_brazil_time.timestamp assert state.last_elasticsearch_to_cassandra_sync == last_elasticsearch_to_cassandra_brazil_time.timestamp
def test_load_from_existing_file_them_save(self, tmpdir, existing_filename): copied_filename = copy_file(tmpdir, existing_filename) store = SyncStateStore(copied_filename) state = store.load() last_cassandra_to_elasticsearch_brazil_time = arrow.get("2015-01-23T14:55:33.123456-03:00", DATE_FORMAT) state.last_cassandra_to_elasticsearch_sync = last_cassandra_to_elasticsearch_brazil_time.timestamp last_elasticsearch_to_cassandra_brazil_time = arrow.get("2015-01-23T15:01:22.654321-03:00", DATE_FORMAT) state.last_elasticsearch_to_cassandra_sync = last_elasticsearch_to_cassandra_brazil_time.timestamp state.save() state = store.load() assert state.last_cassandra_to_elasticsearch_sync == last_cassandra_to_elasticsearch_brazil_time.timestamp assert state.last_elasticsearch_to_cassandra_sync == last_elasticsearch_to_cassandra_brazil_time.timestamp
def test_load_default_from_non_existent_file_them_save(self, tmpdir): filename = unicode(tmpdir.join(random_filename())) store = SyncStateStore(filename) state = store.load() last_cassandra_to_elasticsearch_brazil_time = arrow.get( "2015-01-23T14:55:33.123456-03:00", DATE_FORMAT) state.last_cassandra_to_elasticsearch_sync = last_cassandra_to_elasticsearch_brazil_time.timestamp last_elasticsearch_to_cassandra_brazil_time = arrow.get( "2015-01-23T15:01:22.654321-03:00", DATE_FORMAT) state.last_elasticsearch_to_cassandra_sync = last_elasticsearch_to_cassandra_brazil_time.timestamp state.save() state = store.load() assert state.last_cassandra_to_elasticsearch_sync == last_cassandra_to_elasticsearch_brazil_time.timestamp assert state.last_elasticsearch_to_cassandra_sync == last_elasticsearch_to_cassandra_brazil_time.timestamp
def test_load_from_existing_file_them_save(self, tmpdir, existing_filename): copied_filename = copy_file(tmpdir, existing_filename) store = SyncStateStore(copied_filename) state = store.load() last_cassandra_to_elasticsearch_brazil_time = arrow.get( "2015-01-23T14:55:33.123456-03:00", DATE_FORMAT) state.last_cassandra_to_elasticsearch_sync = last_cassandra_to_elasticsearch_brazil_time.timestamp last_elasticsearch_to_cassandra_brazil_time = arrow.get( "2015-01-23T15:01:22.654321-03:00", DATE_FORMAT) state.last_elasticsearch_to_cassandra_sync = last_elasticsearch_to_cassandra_brazil_time.timestamp state.save() state = store.load() assert state.last_cassandra_to_elasticsearch_sync == last_cassandra_to_elasticsearch_brazil_time.timestamp assert state.last_elasticsearch_to_cassandra_sync == last_elasticsearch_to_cassandra_brazil_time.timestamp
def test_load_from_existing_file(self, existing_filename): store = SyncStateStore(existing_filename) state = store.load() assert state last_elasticsearch_to_cassandra_brazil_time = arrow.get( "2015-01-23T15:01:22.654321-03:00", DATE_FORMAT) last_elasticsearch_to_cassandra_utc_time = arrow.get( "2015-01-23T18:01:22.654321-00:00", DATE_FORMAT) assert state.last_elasticsearch_to_cassandra_sync == last_elasticsearch_to_cassandra_brazil_time.float_timestamp assert state.last_elasticsearch_to_cassandra_sync == last_elasticsearch_to_cassandra_utc_time.float_timestamp last_cassandra_to_elasticsearch_brazil_time = arrow.get( "2015-01-23T14:55:33.123456-03:00", DATE_FORMAT) last_cassandra_to_elasticsearch_utc_time = arrow.get( "2015-01-23T17:55:33.123456-00:00", DATE_FORMAT) assert state.last_cassandra_to_elasticsearch_sync == last_cassandra_to_elasticsearch_brazil_time.float_timestamp assert state.last_cassandra_to_elasticsearch_sync == last_cassandra_to_elasticsearch_utc_time.float_timestamp
class SyncLoop: def __init__(self, cassandra_cluster, elasticsearch_client, settings, state_file_name): self._logger = logging.getLogger() self._state_store = SyncStateStore(state_file_name) self._interval_between_runs = settings.interval_between_runs self._cassandra_to_elasticsearch_river = CassandraToElasticsearchRiver( cassandra_cluster, elasticsearch_client, settings) self._elasticsearch_to_cassandra_river = ElasticsearchToCassandraRiver( elasticsearch_client, cassandra_cluster, settings) def run(self): try: state = self._state_store.load() self.__initial_sync_if_necessary(state) while True: self.__incremental_sync(state) sleep(self._interval_between_runs) except Exception as e: self._logger.error(str(e)) self._logger.error("Aborting...") return def __initial_sync_if_necessary(self, state): if not state.last_cassandra_to_elasticsearch_sync: self.__initial_cassandra_to_elasticsearch_sync(state) if not state.last_elasticsearch_to_cassandra_sync: self.__initial_elasticsearch_to_cassandra_sync(state) def __initial_cassandra_to_elasticsearch_sync(self, state): self._logger.warning( "Initial Cassandra to Elasticsearch sync. This might take a while..." ) state.last_cassandra_to_elasticsearch_sync = self._cassandra_to_elasticsearch_river.propagate_updates( ) state.save() def __initial_elasticsearch_to_cassandra_sync(self, state): self._logger.warning( "Initial Elasticsearch to Cassandra sync. This will take a while..." ) state.last_elasticsearch_to_cassandra_sync = self._elasticsearch_to_cassandra_river.propagate_updates( ) state.save() def __incremental_sync(self, state): self.__incremental_cassandra_to_elasticsearch_sync(state) sleep(_INTERVAL_BETWEEN_RIVER_SYNCS) self.__incremental_elasticsearch_to_cassandra_sync(state) def __incremental_cassandra_to_elasticsearch_sync(self, state): self._logger.info( "Syncing Cassandra to Elasticsearch since %s...", self.__format_timestamp( state.last_cassandra_to_elasticsearch_sync)) timestamp = self._cassandra_to_elasticsearch_river.propagate_updates( state.last_cassandra_to_elasticsearch_sync) state.last_cassandra_to_elasticsearch_sync = timestamp state.save() self._logger.info("...synced until %s.", self.__format_timestamp(timestamp)) def __incremental_elasticsearch_to_cassandra_sync(self, state): self._logger.info( "Syncing Elasticsearch to Cassandra since %s...", self.__format_timestamp( state.last_elasticsearch_to_cassandra_sync)) timestamp = self._elasticsearch_to_cassandra_river.propagate_updates( state.last_elasticsearch_to_cassandra_sync) state.last_elasticsearch_to_cassandra_sync = timestamp state.save() self._logger.info("...synced until %s.", self.__format_timestamp(timestamp)) @staticmethod def __format_timestamp(timestamp): return arrow.get(timestamp).to("local").format( "YYYY-MM-DD HH:mm:ss.SSSSSSZZ")
class SyncLoop: def __init__(self, cassandra_cluster, elasticsearch_client, settings, state_file_name): self._logger = logging.getLogger() self._state_store = SyncStateStore(state_file_name) self._interval_between_runs = settings.interval_between_runs self._cassandra_to_elasticsearch_river = CassandraToElasticsearchRiver( cassandra_cluster, elasticsearch_client, settings) self._elasticsearch_to_cassandra_river = ElasticsearchToCassandraRiver( elasticsearch_client, cassandra_cluster, settings) def run(self): try: state = self._state_store.load() self.__initial_sync_if_necessary(state) while True: self.__incremental_sync(state) sleep(self._interval_between_runs) except Exception as e: self._logger.error(str(e)) self._logger.error("Aborting...") return def __initial_sync_if_necessary(self, state): if not state.last_cassandra_to_elasticsearch_sync: self.__initial_cassandra_to_elasticsearch_sync(state) if not state.last_elasticsearch_to_cassandra_sync: self.__initial_elasticsearch_to_cassandra_sync(state) def __initial_cassandra_to_elasticsearch_sync(self, state): self._logger.warning("Initial Cassandra to Elasticsearch sync. This might take a while...") state.last_cassandra_to_elasticsearch_sync = self._cassandra_to_elasticsearch_river.propagate_updates() state.save() def __initial_elasticsearch_to_cassandra_sync(self, state): self._logger.warning("Initial Elasticsearch to Cassandra sync. This will take a while...") state.last_elasticsearch_to_cassandra_sync = self._elasticsearch_to_cassandra_river.propagate_updates() state.save() def __incremental_sync(self, state): self.__incremental_cassandra_to_elasticsearch_sync(state) sleep(_INTERVAL_BETWEEN_RIVER_SYNCS) self.__incremental_elasticsearch_to_cassandra_sync(state) def __incremental_cassandra_to_elasticsearch_sync(self, state): self._logger.info("Syncing Cassandra to Elasticsearch since %s...", self.__format_timestamp(state.last_cassandra_to_elasticsearch_sync)) timestamp = self._cassandra_to_elasticsearch_river.propagate_updates(state.last_cassandra_to_elasticsearch_sync) state.last_cassandra_to_elasticsearch_sync = timestamp state.save() self._logger.info("...synced until %s.", self.__format_timestamp(timestamp)) def __incremental_elasticsearch_to_cassandra_sync(self, state): self._logger.info("Syncing Elasticsearch to Cassandra since %s...", self.__format_timestamp(state.last_elasticsearch_to_cassandra_sync)) timestamp = self._elasticsearch_to_cassandra_river.propagate_updates(state.last_elasticsearch_to_cassandra_sync) state.last_elasticsearch_to_cassandra_sync = timestamp state.save() self._logger.info("...synced until %s.", self.__format_timestamp(timestamp)) @staticmethod def __format_timestamp(timestamp): return arrow.get(timestamp).to("local").format("YYYY-MM-DD HH:mm:ss.SSSSSSZZ")